From f4e642240c9238cf32a13b8ff47349f68230d3e3 Mon Sep 17 00:00:00 2001 From: PeninsulaInd Date: Thu, 9 Apr 2026 15:22:55 -0500 Subject: [PATCH] Add direct BLM handler for Link Building build stage Replace Claude-based link building with direct subprocess calls to Big-Link-Man CLI. The build stage now runs ingest-cora + generate-batch deterministically without LLM involvement. - New clickup_runner/blm.py: BLM subprocess runner (ported from old linkbuilding.py) - New handler="blm" in skill_map for Link Building build stage - Add BLMConfig to config.py (blm_dir, cora_inbox, timeout) - Add task-specific fields (Keyword, CLIFlags, etc.) to build_prompt - Strip YAML frontmatter from skill files in read_skill_file - Skip orphaned AutoCora results with no state DB entry Co-Authored-By: Claude Opus 4.6 (1M context) --- clickup_runner/__main__.py | 117 +++++++++ clickup_runner/blm.py | 262 ++++++++++++++++++++ clickup_runner/claude_runner.py | 21 ++ clickup_runner/config.py | 10 +- clickup_runner/skill_map.py | 4 +- skills/runner_linkbuilding.md | 69 ++++++ tests/test_clickup_runner/test_skill_map.py | 3 +- 7 files changed, 480 insertions(+), 6 deletions(-) create mode 100644 clickup_runner/blm.py create mode 100644 skills/runner_linkbuilding.md diff --git a/clickup_runner/__main__.py b/clickup_runner/__main__.py index 2565a43..604b58c 100644 --- a/clickup_runner/__main__.py +++ b/clickup_runner/__main__.py @@ -15,6 +15,7 @@ from datetime import datetime, timezone from pathlib import Path from .autocora import archive_result, scan_results, submit_job +from .blm import find_cora_xlsx, run_generate, run_ingest from .claude_runner import ( RunResult, build_prompt, @@ -204,6 +205,8 @@ def poll_cycle( if route.handler == "autocora": _dispatch_autocora(client, cfg, db, task, route, run_id) + elif route.handler == "blm": + _dispatch_blm(client, cfg, db, task, route, run_id) else: _dispatch_claude(client, cfg, db, task, route, run_id) @@ -434,6 +437,120 @@ def _dispatch_autocora( ) +def _dispatch_blm( + client: ClickUpClient, + cfg: Config, + db: StateDB, + task: ClickUpTask, + route: SkillRoute, + run_id: int, +): + """Run BLM ingest-cora + generate-batch directly (no Claude needed).""" + keyword = task.get_field_value("Keyword") or "" + url = task.get_field_value("IMSURL") or "" + cli_flags = task.get_field_value("CLIFlags") or "" + bp_ratio = task.get_field_value("BrandedPlusRatio") or "" + custom_anchors = task.get_field_value("CustomAnchors") or "" + + if not keyword: + _handle_dispatch_error( + client, cfg, db, task, run_id, + error="Missing Keyword field", + fix="Set the Keyword custom field, then re-check Delegate to Claude.", + ) + return + + # 1. Set status to AI Working + client.update_task_status(task.id, cfg.clickup.ai_working_status) + + # 2. Find the Cora xlsx + xlsx_path = find_cora_xlsx(keyword, cfg.blm.cora_inbox) + if not xlsx_path: + _handle_dispatch_error( + client, cfg, db, task, run_id, + error="No Cora xlsx found for keyword '%s' in %s" + % (keyword, cfg.blm.cora_inbox), + fix="Check that the Cora report exists in %s, then re-check Delegate to Claude." + % cfg.blm.cora_inbox, + ) + return + + log.info("Found Cora xlsx: %s", xlsx_path) + client.add_comment(task.id, "Starting BLM pipeline for '%s'.\nCora file: %s" % (keyword, xlsx_path)) + + # 3. Run ingest-cora + log.info("Running ingest-cora for task %s (keyword=%s)", task.id, keyword) + ingest = run_ingest( + xlsx_path=xlsx_path, + keyword=keyword, + money_site_url=url, + blm_dir=cfg.blm.blm_dir, + timeout=cfg.blm.timeout_seconds, + branded_plus_ratio=bp_ratio, + custom_anchors=custom_anchors, + cli_flags=cli_flags, + ) + + if not ingest.success: + _handle_dispatch_error( + client, cfg, db, task, run_id, + error="ingest-cora failed: %s" % ingest.error, + fix="Check BLM logs, fix the issue, then re-check Delegate to Claude.", + ) + return + + log.info( + "ingest-cora OK: project=%s (ID=%s), job_file=%s", + ingest.project_name, ingest.project_id, ingest.job_file, + ) + + # 4. Run generate-batch + log.info("Running generate-batch for task %s (job=%s)", task.id, ingest.job_file) + gen = run_generate( + job_file=ingest.job_file, + blm_dir=cfg.blm.blm_dir, + timeout=cfg.blm.timeout_seconds, + ) + + if not gen.success: + _handle_dispatch_error( + client, cfg, db, task, run_id, + error="generate-batch failed: %s" % gen.error, + fix="Check BLM logs, fix the issue, then re-check Delegate to Claude.", + ) + return + + log.info("generate-batch OK: job moved to %s", gen.job_moved_to) + + # 5. Advance stage + post summary + client.set_stage( + task.id, task.list_id, route.next_stage, cfg.clickup.stage_field_name + ) + client.update_task_status(task.id, route.next_status) + + summary = ( + "BLM pipeline completed for '%s'.\n\n" + "- Project: %s (ID: %s)\n" + "- Keyword: %s\n" + "- Job file: %s" + ) % (keyword, ingest.project_name, ingest.project_id, + ingest.main_keyword, gen.job_moved_to or ingest.job_file) + + client.add_comment(task.id, summary) + + # 6. Clear error + uncheck delegate + client.set_checkbox( + task.id, task.list_id, cfg.clickup.error_field_name, False + ) + client.set_checkbox( + task.id, task.list_id, cfg.clickup.delegate_field_name, False + ) + + db.log_run_finish(run_id, "completed", result="BLM pipeline done") + notify(cfg, "BLM done: %s" % keyword, "Task %s completed" % task.id) + log.info("BLM pipeline completed for task %s (keyword=%s)", task.id, keyword) + + def _download_attachments( client: ClickUpClient, task: ClickUpTask, diff --git a/clickup_runner/blm.py b/clickup_runner/blm.py new file mode 100644 index 0000000..5864768 --- /dev/null +++ b/clickup_runner/blm.py @@ -0,0 +1,262 @@ +"""Big-Link-Man CLI runner. + +Runs ingest-cora and generate-batch via BLM's own venv Python. +Ported from cheddahbot/tools/linkbuilding.py for headless use. +""" + +from __future__ import annotations + +import logging +import os +import re +import subprocess +from dataclasses import dataclass +from pathlib import Path + +log = logging.getLogger(__name__) + + +@dataclass +class IngestResult: + """Parsed output from ingest-cora.""" + + success: bool + project_id: str = "" + project_name: str = "" + main_keyword: str = "" + job_file: str = "" + error: str = "" + + +@dataclass +class GenerateResult: + """Parsed output from generate-batch.""" + + success: bool + job_moved_to: str = "" + error: str = "" + + +def _resolve_venv_python(blm_dir: str) -> Path: + """Find BLM's venv Python executable.""" + venv_python = Path(blm_dir) / ".venv" / "Scripts" / "python.exe" + if not venv_python.exists(): + # Fallback for Linux/Mac + venv_python = Path(blm_dir) / ".venv" / "bin" / "python" + if not venv_python.exists(): + raise FileNotFoundError( + "No .venv found in %s. BLM must have its own venv." % blm_dir + ) + return venv_python + + +def _run_blm( + args: list[str], blm_dir: str, timeout: int = 1800 +) -> subprocess.CompletedProcess: + """Run a BLM CLI command with credential injection.""" + venv_python = _resolve_venv_python(blm_dir) + cmd = [str(venv_python), "main.py"] + args + + # Inject credentials from env vars + username = os.getenv("BLM_USERNAME", "") + password = os.getenv("BLM_PASSWORD", "") + if username and "-u" not in args and "--username" not in args: + cmd.extend(["-u", username]) + if password and "-p" not in args and "--password" not in args: + cmd.extend(["-p", password]) + + log.info("BLM command: %s (cwd=%s)", " ".join(cmd), blm_dir) + result = subprocess.run( + cmd, + cwd=blm_dir, + capture_output=True, + text=True, + timeout=timeout, + ) + log.info("BLM exit code: %d", result.returncode) + if result.stdout: + log.debug("BLM stdout: %s", result.stdout[:2000]) + if result.stderr: + log.debug("BLM stderr: %s", result.stderr[:2000]) + return result + + +def find_cora_xlsx(keyword: str, cora_inbox: str) -> str | None: + """Find the Cora xlsx in the inbox directory by keyword match. + + Looks for files whose name (slugified) matches the keyword. + Returns the full path or None. + """ + inbox = Path(cora_inbox) + if not inbox.exists(): + log.warning("Cora inbox not found: %s", cora_inbox) + return None + + # Slugify keyword for matching: lowercase, spaces -> underscores + slug = keyword.lower().strip().replace(" ", "_") + slug = re.sub(r"[^a-z0-9_]", "", slug) + + # Look for exact match first, then prefix match + for xlsx in sorted(inbox.glob("*.xlsx"), key=lambda p: p.stat().st_mtime, reverse=True): + name_lower = xlsx.stem.lower() + if name_lower == slug: + return str(xlsx) + + # Prefix match (keyword slug is prefix of filename) + for xlsx in sorted(inbox.glob("*.xlsx"), key=lambda p: p.stat().st_mtime, reverse=True): + name_lower = xlsx.stem.lower() + if name_lower.startswith(slug): + return str(xlsx) + + log.warning("No xlsx matching '%s' in %s", keyword, cora_inbox) + return None + + +def build_ingest_args( + xlsx_path: str, + project_name: str, + money_site_url: str = "", + branded_plus_ratio: str = "", + custom_anchors: str = "", + cli_flags: str = "", +) -> list[str]: + """Build the ingest-cora CLI argument list.""" + args = ["ingest-cora", "-f", xlsx_path, "-n", project_name] + + if money_site_url: + args.extend(["-m", money_site_url]) + + if branded_plus_ratio: + try: + bp = float(branded_plus_ratio) + if bp != 0.7: + args.extend(["-bp", str(bp)]) + except (ValueError, TypeError): + pass + + if custom_anchors: + args.extend(["-a", custom_anchors]) + + if cli_flags: + args.extend(cli_flags.strip().split()) + + return args + + +def parse_ingest_output(stdout: str) -> IngestResult: + """Parse ingest-cora stdout.""" + result = IngestResult(success=False) + + for line in stdout.splitlines(): + line = line.strip() + + m = re.match(r"^Success: Project '(.+)' created \(ID: (\d+)\)$", line) + if m: + result.project_name = m.group(1) + result.project_id = m.group(2) + result.success = True + continue + + m = re.match(r"^Job file created: (.+)$", line) + if m: + result.job_file = m.group(1).strip() + continue + + m = re.match(r"^Main Keyword: (.+)$", line) + if m: + result.main_keyword = m.group(1).strip() + continue + + return result + + +def parse_generate_output(stdout: str) -> GenerateResult: + """Parse generate-batch stdout.""" + result = GenerateResult(success=False) + + for line in stdout.splitlines(): + line = line.strip() + + m = re.match(r"^Job file moved to: (.+)$", line) + if m: + result.job_moved_to = m.group(1).strip() + result.success = True + continue + + return result + + +def run_ingest( + xlsx_path: str, + keyword: str, + money_site_url: str, + blm_dir: str, + timeout: int = 1800, + branded_plus_ratio: str = "", + custom_anchors: str = "", + cli_flags: str = "", +) -> IngestResult: + """Run ingest-cora and return parsed result.""" + args = build_ingest_args( + xlsx_path=xlsx_path, + project_name=keyword, + money_site_url=money_site_url, + branded_plus_ratio=branded_plus_ratio, + custom_anchors=custom_anchors, + cli_flags=cli_flags, + ) + + try: + proc = _run_blm(args, blm_dir, timeout=timeout) + except subprocess.TimeoutExpired: + return IngestResult( + success=False, + error="ingest-cora timed out after %d seconds" % timeout, + ) + except FileNotFoundError as e: + return IngestResult(success=False, error=str(e)) + + if proc.returncode != 0: + return IngestResult( + success=False, + error="ingest-cora failed (exit code %d).\nstdout: %s\nstderr: %s" + % (proc.returncode, proc.stdout[-500:], proc.stderr[-500:]), + ) + + parsed = parse_ingest_output(proc.stdout) + if not parsed.job_file: + return IngestResult( + success=False, + error="ingest-cora produced no job file.\nstdout: %s" % proc.stdout[-500:], + ) + + return parsed + + +def run_generate( + job_file: str, + blm_dir: str, + timeout: int = 1800, +) -> GenerateResult: + """Run generate-batch and return parsed result.""" + job_path = Path(blm_dir) / job_file if not Path(job_file).is_absolute() else Path(job_file) + args = ["generate-batch", "-j", str(job_path), "--continue-on-error"] + + try: + proc = _run_blm(args, blm_dir, timeout=timeout) + except subprocess.TimeoutExpired: + return GenerateResult( + success=False, + error="generate-batch timed out after %d seconds" % timeout, + ) + except FileNotFoundError as e: + return GenerateResult(success=False, error=str(e)) + + if proc.returncode != 0: + return GenerateResult( + success=False, + error="generate-batch failed (exit code %d).\nstdout: %s\nstderr: %s" + % (proc.returncode, proc.stdout[-500:], proc.stderr[-500:]), + ) + + return parse_generate_output(proc.stdout) diff --git a/clickup_runner/claude_runner.py b/clickup_runner/claude_runner.py index 9bb9fe5..a7d9a7c 100644 --- a/clickup_runner/claude_runner.py +++ b/clickup_runner/claude_runner.py @@ -73,6 +73,27 @@ def build_prompt( if task.url: ctx_lines.append("ClickUp Task: %s" % task.url) + # Task-type-specific fields + keyword = task.get_field_value("Keyword") + if keyword: + ctx_lines.append("Keyword: %s" % keyword) + + cli_flags = task.get_field_value("CLIFlags") + if cli_flags: + ctx_lines.append("CLIFlags: %s" % cli_flags) + + bp_ratio = task.get_field_value("BrandedPlusRatio") + if bp_ratio: + ctx_lines.append("BrandedPlusRatio: %s" % bp_ratio) + + custom_anchors = task.get_field_value("CustomAnchors") + if custom_anchors: + ctx_lines.append("CustomAnchors: %s" % custom_anchors) + + pr_topic = task.get_field_value("PR Topic") + if pr_topic: + ctx_lines.append("PR Topic: %s" % pr_topic) + if task.description: ctx_lines.append("") ctx_lines.append("### Description") diff --git a/clickup_runner/config.py b/clickup_runner/config.py index d1e8223..22c6362 100644 --- a/clickup_runner/config.py +++ b/clickup_runner/config.py @@ -40,6 +40,13 @@ class AutoCoraConfig: poll_interval_seconds: int = 120 +@dataclass +class BLMConfig: + blm_dir: str = "E:/dev/Big-Link-Man" + cora_inbox: str = "//PennQnap1/SHARE1/cora-inbox" + timeout_seconds: int = 1800 # 30 minutes + + @dataclass class NASConfig: generated_dir: str = "//PennQnap1/SHARE1/generated" @@ -65,6 +72,7 @@ class NtfyConfig: class Config: clickup: ClickUpConfig = field(default_factory=ClickUpConfig) autocora: AutoCoraConfig = field(default_factory=AutoCoraConfig) + blm: BLMConfig = field(default_factory=BLMConfig) nas: NASConfig = field(default_factory=NASConfig) runner: RunnerConfig = field(default_factory=RunnerConfig) ntfy: NtfyConfig = field(default_factory=NtfyConfig) @@ -93,7 +101,7 @@ def load_config(yaml_path: Path | None = None) -> Config: with open(yaml_path) as f: data = yaml.safe_load(f) or {} - for section_name in ("clickup", "autocora", "nas", "runner", "ntfy"): + for section_name in ("clickup", "autocora", "blm", "nas", "runner", "ntfy"): if section_name in data and isinstance(data[section_name], dict): _apply_section(getattr(cfg, section_name), data[section_name]) diff --git a/clickup_runner/skill_map.py b/clickup_runner/skill_map.py index d3a76ef..2117d65 100644 --- a/clickup_runner/skill_map.py +++ b/clickup_runner/skill_map.py @@ -96,11 +96,9 @@ SKILL_MAP: dict[str, dict[str, SkillRoute]] = { next_status="review", ), "build": SkillRoute( - skill_file="linkbuilding.md", + handler="blm", next_stage="final", next_status="review", - tools=_LINK_TOOLS, - max_turns=15, ), }, } diff --git a/skills/runner_linkbuilding.md b/skills/runner_linkbuilding.md new file mode 100644 index 0000000..38f23d6 --- /dev/null +++ b/skills/runner_linkbuilding.md @@ -0,0 +1,69 @@ +# Link Building -- Build Stage + +Run the Big-Link-Man (BLM) CLI to ingest a Cora report and generate tiered backlink content. + +## What You Have + +- A Cora `.xlsx` report on the NAS at `//PennQnap1/SHARE1/Cora72-for-macro/` +- The task's keyword, target URL, and optional CLI flags in the Task Context below + +## Steps + +### 1. Find the Cora .xlsx File + +Search `//PennQnap1/SHARE1/Cora72-for-macro/` for a file matching the task keyword. The filename is a slugified version of the keyword (e.g., "gearbox oil sight glass" -> `gearbox_oil_sight_glass.xlsx`). Use glob/ls to find it. If multiple matches exist, pick the most recent. + +### 2. Run ingest-cora + +Run the BLM CLI using its own venv Python (NOT the system Python or CheddahBot's venv): + +```bash +E:/dev/Big-Link-Man/.venv/Scripts/python.exe main.py ingest-cora \ + -f "" \ + -n "" \ + -m "" \ + +``` + +- Working directory: `E:/dev/Big-Link-Man` +- Always pass `-m` with the target URL (prevents interactive prompts) +- If BrandedPlusRatio is provided and not 0.7, add `-bp ` +- If CustomAnchors is provided, add `-a ""` +- If CLIFlags is provided, append them as-is (e.g. `--tier1-count 6`) +- BLM credentials are in env vars `BLM_USERNAME` and `BLM_PASSWORD` -- pass them as `-u` and `-p` + +**Expected output:** Look for lines like: +- `Success: Project 'name' created (ID: 42)` +- `Job file created: jobs/some-file.json` + +If exit code is non-zero, stop and report the error including stdout and stderr. + +### 3. Run generate-batch + +Using the job file path from step 2: + +```bash +E:/dev/Big-Link-Man/.venv/Scripts/python.exe main.py generate-batch \ + -j "" \ + --continue-on-error +``` + +- Working directory: `E:/dev/Big-Link-Man` +- Always include `--continue-on-error` +- Pass `-u` and `-p` credentials from env vars +- This step can take several minutes + +If exit code is non-zero, stop and report the error. + +### 4. Collect Output + +After generate-batch completes, the generated content files are in Big-Link-Man's output directory. Copy the relevant output files to the current working directory so they get uploaded to ClickUp. + +Look in `E:/dev/Big-Link-Man/output/` for the project folder matching the keyword. Copy all `.md`, `.txt`, and `.html` files from there to the current working directory. + +## Important + +- NEVER use `uv run` or CheddahBot's Python -- always use BLM's venv at `E:/dev/Big-Link-Man/.venv/Scripts/python.exe` +- If the venv doesn't exist, stop and report the error +- Do not modify any BLM source code +- Do not create subdirectories in the working directory diff --git a/tests/test_clickup_runner/test_skill_map.py b/tests/test_clickup_runner/test_skill_map.py index 87d8a4d..5da674c 100644 --- a/tests/test_clickup_runner/test_skill_map.py +++ b/tests/test_clickup_runner/test_skill_map.py @@ -64,8 +64,7 @@ class TestGetRoute: def test_link_building_build(self): route = get_route("Link Building", "build") assert route is not None - assert route.handler == "claude" - assert route.skill_file == "linkbuilding.md" + assert route.handler == "blm" def test_unknown_task_type_returns_none(self): assert get_route("Banana Farming", "draft") is None