From f4e642240c9238cf32a13b8ff47349f68230d3e3 Mon Sep 17 00:00:00 2001
From: PeninsulaInd <bryan@peninsulaindustries.com>
Date: Thu, 9 Apr 2026 15:22:55 -0500
Subject: [PATCH] Add direct BLM handler for Link Building build stage

Replace Claude-based link building with direct subprocess calls to
Big-Link-Man CLI. The build stage now runs ingest-cora + generate-batch
deterministically without LLM involvement.

- New clickup_runner/blm.py: BLM subprocess runner (ported from old linkbuilding.py)
- New handler="blm" in skill_map for Link Building build stage
- Add BLMConfig to config.py (blm_dir, cora_inbox, timeout)
- Add task-specific fields (Keyword, CLIFlags, etc.) to build_prompt
- Strip YAML frontmatter from skill files in read_skill_file
- Skip orphaned AutoCora results with no state DB entry

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 clickup_runner/__main__.py                  | 117 +++++++++
 clickup_runner/blm.py                       | 262 ++++++++++++++++++++
 clickup_runner/claude_runner.py             |  21 ++
 clickup_runner/config.py                    |  10 +-
 clickup_runner/skill_map.py                 |   4 +-
 skills/runner_linkbuilding.md               |  69 ++++++
 tests/test_clickup_runner/test_skill_map.py |   3 +-
 7 files changed, 480 insertions(+), 6 deletions(-)
 create mode 100644 clickup_runner/blm.py
 create mode 100644 skills/runner_linkbuilding.md

diff --git a/clickup_runner/__main__.py b/clickup_runner/__main__.py
index 2565a43..604b58c 100644
--- a/clickup_runner/__main__.py
+++ b/clickup_runner/__main__.py
@@ -15,6 +15,7 @@ from datetime import datetime, timezone
 from pathlib import Path
 
 from .autocora import archive_result, scan_results, submit_job
+from .blm import find_cora_xlsx, run_generate, run_ingest
 from .claude_runner import (
     RunResult,
     build_prompt,
@@ -204,6 +205,8 @@ def poll_cycle(
 
         if route.handler == "autocora":
             _dispatch_autocora(client, cfg, db, task, route, run_id)
+        elif route.handler == "blm":
+            _dispatch_blm(client, cfg, db, task, route, run_id)
         else:
             _dispatch_claude(client, cfg, db, task, route, run_id)
 
@@ -434,6 +437,120 @@ def _dispatch_autocora(
     )
 
 
+def _dispatch_blm(
+    client: ClickUpClient,
+    cfg: Config,
+    db: StateDB,
+    task: ClickUpTask,
+    route: SkillRoute,
+    run_id: int,
+):
+    """Run BLM ingest-cora + generate-batch directly (no Claude needed)."""
+    keyword = task.get_field_value("Keyword") or ""
+    url = task.get_field_value("IMSURL") or ""
+    cli_flags = task.get_field_value("CLIFlags") or ""
+    bp_ratio = task.get_field_value("BrandedPlusRatio") or ""
+    custom_anchors = task.get_field_value("CustomAnchors") or ""
+
+    if not keyword:
+        _handle_dispatch_error(
+            client, cfg, db, task, run_id,
+            error="Missing Keyword field",
+            fix="Set the Keyword custom field, then re-check Delegate to Claude.",
+        )
+        return
+
+    # 1. Set status to AI Working
+    client.update_task_status(task.id, cfg.clickup.ai_working_status)
+
+    # 2. Find the Cora xlsx
+    xlsx_path = find_cora_xlsx(keyword, cfg.blm.cora_inbox)
+    if not xlsx_path:
+        _handle_dispatch_error(
+            client, cfg, db, task, run_id,
+            error="No Cora xlsx found for keyword '%s' in %s"
+            % (keyword, cfg.blm.cora_inbox),
+            fix="Check that the Cora report exists in %s, then re-check Delegate to Claude."
+            % cfg.blm.cora_inbox,
+        )
+        return
+
+    log.info("Found Cora xlsx: %s", xlsx_path)
+    client.add_comment(task.id, "Starting BLM pipeline for '%s'.\nCora file: %s" % (keyword, xlsx_path))
+
+    # 3. Run ingest-cora
+    log.info("Running ingest-cora for task %s (keyword=%s)", task.id, keyword)
+    ingest = run_ingest(
+        xlsx_path=xlsx_path,
+        keyword=keyword,
+        money_site_url=url,
+        blm_dir=cfg.blm.blm_dir,
+        timeout=cfg.blm.timeout_seconds,
+        branded_plus_ratio=bp_ratio,
+        custom_anchors=custom_anchors,
+        cli_flags=cli_flags,
+    )
+
+    if not ingest.success:
+        _handle_dispatch_error(
+            client, cfg, db, task, run_id,
+            error="ingest-cora failed: %s" % ingest.error,
+            fix="Check BLM logs, fix the issue, then re-check Delegate to Claude.",
+        )
+        return
+
+    log.info(
+        "ingest-cora OK: project=%s (ID=%s), job_file=%s",
+        ingest.project_name, ingest.project_id, ingest.job_file,
+    )
+
+    # 4. Run generate-batch
+    log.info("Running generate-batch for task %s (job=%s)", task.id, ingest.job_file)
+    gen = run_generate(
+        job_file=ingest.job_file,
+        blm_dir=cfg.blm.blm_dir,
+        timeout=cfg.blm.timeout_seconds,
+    )
+
+    if not gen.success:
+        _handle_dispatch_error(
+            client, cfg, db, task, run_id,
+            error="generate-batch failed: %s" % gen.error,
+            fix="Check BLM logs, fix the issue, then re-check Delegate to Claude.",
+        )
+        return
+
+    log.info("generate-batch OK: job moved to %s", gen.job_moved_to)
+
+    # 5. Advance stage + post summary
+    client.set_stage(
+        task.id, task.list_id, route.next_stage, cfg.clickup.stage_field_name
+    )
+    client.update_task_status(task.id, route.next_status)
+
+    summary = (
+        "BLM pipeline completed for '%s'.\n\n"
+        "- Project: %s (ID: %s)\n"
+        "- Keyword: %s\n"
+        "- Job file: %s"
+    ) % (keyword, ingest.project_name, ingest.project_id,
+         ingest.main_keyword, gen.job_moved_to or ingest.job_file)
+
+    client.add_comment(task.id, summary)
+
+    # 6. Clear error + uncheck delegate
+    client.set_checkbox(
+        task.id, task.list_id, cfg.clickup.error_field_name, False
+    )
+    client.set_checkbox(
+        task.id, task.list_id, cfg.clickup.delegate_field_name, False
+    )
+
+    db.log_run_finish(run_id, "completed", result="BLM pipeline done")
+    notify(cfg, "BLM done: %s" % keyword, "Task %s completed" % task.id)
+    log.info("BLM pipeline completed for task %s (keyword=%s)", task.id, keyword)
+
+
 def _download_attachments(
     client: ClickUpClient,
     task: ClickUpTask,
diff --git a/clickup_runner/blm.py b/clickup_runner/blm.py
new file mode 100644
index 0000000..5864768
--- /dev/null
+++ b/clickup_runner/blm.py
@@ -0,0 +1,262 @@
+"""Big-Link-Man CLI runner.
+
+Runs ingest-cora and generate-batch via BLM's own venv Python.
+Ported from cheddahbot/tools/linkbuilding.py for headless use.
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import re
+import subprocess
+from dataclasses import dataclass
+from pathlib import Path
+
+log = logging.getLogger(__name__)
+
+
+@dataclass
+class IngestResult:
+    """Parsed output from ingest-cora."""
+
+    success: bool
+    project_id: str = ""
+    project_name: str = ""
+    main_keyword: str = ""
+    job_file: str = ""
+    error: str = ""
+
+
+@dataclass
+class GenerateResult:
+    """Parsed output from generate-batch."""
+
+    success: bool
+    job_moved_to: str = ""
+    error: str = ""
+
+
+def _resolve_venv_python(blm_dir: str) -> Path:
+    """Find BLM's venv Python executable."""
+    venv_python = Path(blm_dir) / ".venv" / "Scripts" / "python.exe"
+    if not venv_python.exists():
+        # Fallback for Linux/Mac
+        venv_python = Path(blm_dir) / ".venv" / "bin" / "python"
+    if not venv_python.exists():
+        raise FileNotFoundError(
+            "No .venv found in %s. BLM must have its own venv." % blm_dir
+        )
+    return venv_python
+
+
+def _run_blm(
+    args: list[str], blm_dir: str, timeout: int = 1800
+) -> subprocess.CompletedProcess:
+    """Run a BLM CLI command with credential injection."""
+    venv_python = _resolve_venv_python(blm_dir)
+    cmd = [str(venv_python), "main.py"] + args
+
+    # Inject credentials from env vars
+    username = os.getenv("BLM_USERNAME", "")
+    password = os.getenv("BLM_PASSWORD", "")
+    if username and "-u" not in args and "--username" not in args:
+        cmd.extend(["-u", username])
+    if password and "-p" not in args and "--password" not in args:
+        cmd.extend(["-p", password])
+
+    log.info("BLM command: %s (cwd=%s)", " ".join(cmd), blm_dir)
+    result = subprocess.run(
+        cmd,
+        cwd=blm_dir,
+        capture_output=True,
+        text=True,
+        timeout=timeout,
+    )
+    log.info("BLM exit code: %d", result.returncode)
+    if result.stdout:
+        log.debug("BLM stdout: %s", result.stdout[:2000])
+    if result.stderr:
+        log.debug("BLM stderr: %s", result.stderr[:2000])
+    return result
+
+
+def find_cora_xlsx(keyword: str, cora_inbox: str) -> str | None:
+    """Find the Cora xlsx in the inbox directory by keyword match.
+
+    Looks for files whose name (slugified) matches the keyword.
+    Returns the full path or None.
+    """
+    inbox = Path(cora_inbox)
+    if not inbox.exists():
+        log.warning("Cora inbox not found: %s", cora_inbox)
+        return None
+
+    # Slugify keyword for matching: lowercase, spaces -> underscores
+    slug = keyword.lower().strip().replace(" ", "_")
+    slug = re.sub(r"[^a-z0-9_]", "", slug)
+
+    # Look for exact match first, then prefix match
+    for xlsx in sorted(inbox.glob("*.xlsx"), key=lambda p: p.stat().st_mtime, reverse=True):
+        name_lower = xlsx.stem.lower()
+        if name_lower == slug:
+            return str(xlsx)
+
+    # Prefix match (keyword slug is prefix of filename)
+    for xlsx in sorted(inbox.glob("*.xlsx"), key=lambda p: p.stat().st_mtime, reverse=True):
+        name_lower = xlsx.stem.lower()
+        if name_lower.startswith(slug):
+            return str(xlsx)
+
+    log.warning("No xlsx matching '%s' in %s", keyword, cora_inbox)
+    return None
+
+
+def build_ingest_args(
+    xlsx_path: str,
+    project_name: str,
+    money_site_url: str = "",
+    branded_plus_ratio: str = "",
+    custom_anchors: str = "",
+    cli_flags: str = "",
+) -> list[str]:
+    """Build the ingest-cora CLI argument list."""
+    args = ["ingest-cora", "-f", xlsx_path, "-n", project_name]
+
+    if money_site_url:
+        args.extend(["-m", money_site_url])
+
+    if branded_plus_ratio:
+        try:
+            bp = float(branded_plus_ratio)
+            if bp != 0.7:
+                args.extend(["-bp", str(bp)])
+        except (ValueError, TypeError):
+            pass
+
+    if custom_anchors:
+        args.extend(["-a", custom_anchors])
+
+    if cli_flags:
+        args.extend(cli_flags.strip().split())
+
+    return args
+
+
+def parse_ingest_output(stdout: str) -> IngestResult:
+    """Parse ingest-cora stdout."""
+    result = IngestResult(success=False)
+
+    for line in stdout.splitlines():
+        line = line.strip()
+
+        m = re.match(r"^Success: Project '(.+)' created \(ID: (\d+)\)$", line)
+        if m:
+            result.project_name = m.group(1)
+            result.project_id = m.group(2)
+            result.success = True
+            continue
+
+        m = re.match(r"^Job file created: (.+)$", line)
+        if m:
+            result.job_file = m.group(1).strip()
+            continue
+
+        m = re.match(r"^Main Keyword: (.+)$", line)
+        if m:
+            result.main_keyword = m.group(1).strip()
+            continue
+
+    return result
+
+
+def parse_generate_output(stdout: str) -> GenerateResult:
+    """Parse generate-batch stdout."""
+    result = GenerateResult(success=False)
+
+    for line in stdout.splitlines():
+        line = line.strip()
+
+        m = re.match(r"^Job file moved to: (.+)$", line)
+        if m:
+            result.job_moved_to = m.group(1).strip()
+            result.success = True
+            continue
+
+    return result
+
+
+def run_ingest(
+    xlsx_path: str,
+    keyword: str,
+    money_site_url: str,
+    blm_dir: str,
+    timeout: int = 1800,
+    branded_plus_ratio: str = "",
+    custom_anchors: str = "",
+    cli_flags: str = "",
+) -> IngestResult:
+    """Run ingest-cora and return parsed result."""
+    args = build_ingest_args(
+        xlsx_path=xlsx_path,
+        project_name=keyword,
+        money_site_url=money_site_url,
+        branded_plus_ratio=branded_plus_ratio,
+        custom_anchors=custom_anchors,
+        cli_flags=cli_flags,
+    )
+
+    try:
+        proc = _run_blm(args, blm_dir, timeout=timeout)
+    except subprocess.TimeoutExpired:
+        return IngestResult(
+            success=False,
+            error="ingest-cora timed out after %d seconds" % timeout,
+        )
+    except FileNotFoundError as e:
+        return IngestResult(success=False, error=str(e))
+
+    if proc.returncode != 0:
+        return IngestResult(
+            success=False,
+            error="ingest-cora failed (exit code %d).\nstdout: %s\nstderr: %s"
+            % (proc.returncode, proc.stdout[-500:], proc.stderr[-500:]),
+        )
+
+    parsed = parse_ingest_output(proc.stdout)
+    if not parsed.job_file:
+        return IngestResult(
+            success=False,
+            error="ingest-cora produced no job file.\nstdout: %s" % proc.stdout[-500:],
+        )
+
+    return parsed
+
+
+def run_generate(
+    job_file: str,
+    blm_dir: str,
+    timeout: int = 1800,
+) -> GenerateResult:
+    """Run generate-batch and return parsed result."""
+    job_path = Path(blm_dir) / job_file if not Path(job_file).is_absolute() else Path(job_file)
+    args = ["generate-batch", "-j", str(job_path), "--continue-on-error"]
+
+    try:
+        proc = _run_blm(args, blm_dir, timeout=timeout)
+    except subprocess.TimeoutExpired:
+        return GenerateResult(
+            success=False,
+            error="generate-batch timed out after %d seconds" % timeout,
+        )
+    except FileNotFoundError as e:
+        return GenerateResult(success=False, error=str(e))
+
+    if proc.returncode != 0:
+        return GenerateResult(
+            success=False,
+            error="generate-batch failed (exit code %d).\nstdout: %s\nstderr: %s"
+            % (proc.returncode, proc.stdout[-500:], proc.stderr[-500:]),
+        )
+
+    return parse_generate_output(proc.stdout)
diff --git a/clickup_runner/claude_runner.py b/clickup_runner/claude_runner.py
index 9bb9fe5..a7d9a7c 100644
--- a/clickup_runner/claude_runner.py
+++ b/clickup_runner/claude_runner.py
@@ -73,6 +73,27 @@ def build_prompt(
     if task.url:
         ctx_lines.append("ClickUp Task: %s" % task.url)
 
+    # Task-type-specific fields
+    keyword = task.get_field_value("Keyword")
+    if keyword:
+        ctx_lines.append("Keyword: %s" % keyword)
+
+    cli_flags = task.get_field_value("CLIFlags")
+    if cli_flags:
+        ctx_lines.append("CLIFlags: %s" % cli_flags)
+
+    bp_ratio = task.get_field_value("BrandedPlusRatio")
+    if bp_ratio:
+        ctx_lines.append("BrandedPlusRatio: %s" % bp_ratio)
+
+    custom_anchors = task.get_field_value("CustomAnchors")
+    if custom_anchors:
+        ctx_lines.append("CustomAnchors: %s" % custom_anchors)
+
+    pr_topic = task.get_field_value("PR Topic")
+    if pr_topic:
+        ctx_lines.append("PR Topic: %s" % pr_topic)
+
     if task.description:
         ctx_lines.append("")
         ctx_lines.append("### Description")
diff --git a/clickup_runner/config.py b/clickup_runner/config.py
index d1e8223..22c6362 100644
--- a/clickup_runner/config.py
+++ b/clickup_runner/config.py
@@ -40,6 +40,13 @@ class AutoCoraConfig:
     poll_interval_seconds: int = 120
 
 
+@dataclass
+class BLMConfig:
+    blm_dir: str = "E:/dev/Big-Link-Man"
+    cora_inbox: str = "//PennQnap1/SHARE1/cora-inbox"
+    timeout_seconds: int = 1800  # 30 minutes
+
+
 @dataclass
 class NASConfig:
     generated_dir: str = "//PennQnap1/SHARE1/generated"
@@ -65,6 +72,7 @@ class NtfyConfig:
 class Config:
     clickup: ClickUpConfig = field(default_factory=ClickUpConfig)
     autocora: AutoCoraConfig = field(default_factory=AutoCoraConfig)
+    blm: BLMConfig = field(default_factory=BLMConfig)
     nas: NASConfig = field(default_factory=NASConfig)
     runner: RunnerConfig = field(default_factory=RunnerConfig)
     ntfy: NtfyConfig = field(default_factory=NtfyConfig)
@@ -93,7 +101,7 @@ def load_config(yaml_path: Path | None = None) -> Config:
         with open(yaml_path) as f:
             data = yaml.safe_load(f) or {}
 
-        for section_name in ("clickup", "autocora", "nas", "runner", "ntfy"):
+        for section_name in ("clickup", "autocora", "blm", "nas", "runner", "ntfy"):
             if section_name in data and isinstance(data[section_name], dict):
                 _apply_section(getattr(cfg, section_name), data[section_name])
 
diff --git a/clickup_runner/skill_map.py b/clickup_runner/skill_map.py
index d3a76ef..2117d65 100644
--- a/clickup_runner/skill_map.py
+++ b/clickup_runner/skill_map.py
@@ -96,11 +96,9 @@ SKILL_MAP: dict[str, dict[str, SkillRoute]] = {
             next_status="review",
         ),
         "build": SkillRoute(
-            skill_file="linkbuilding.md",
+            handler="blm",
             next_stage="final",
             next_status="review",
-            tools=_LINK_TOOLS,
-            max_turns=15,
         ),
     },
 }
diff --git a/skills/runner_linkbuilding.md b/skills/runner_linkbuilding.md
new file mode 100644
index 0000000..38f23d6
--- /dev/null
+++ b/skills/runner_linkbuilding.md
@@ -0,0 +1,69 @@
+# Link Building -- Build Stage
+
+Run the Big-Link-Man (BLM) CLI to ingest a Cora report and generate tiered backlink content.
+
+## What You Have
+
+- A Cora `.xlsx` report on the NAS at `//PennQnap1/SHARE1/Cora72-for-macro/`
+- The task's keyword, target URL, and optional CLI flags in the Task Context below
+
+## Steps
+
+### 1. Find the Cora .xlsx File
+
+Search `//PennQnap1/SHARE1/Cora72-for-macro/` for a file matching the task keyword. The filename is a slugified version of the keyword (e.g., "gearbox oil sight glass" -> `gearbox_oil_sight_glass.xlsx`). Use glob/ls to find it. If multiple matches exist, pick the most recent.
+
+### 2. Run ingest-cora
+
+Run the BLM CLI using its own venv Python (NOT the system Python or CheddahBot's venv):
+
+```bash
+E:/dev/Big-Link-Man/.venv/Scripts/python.exe main.py ingest-cora \
+  -f "<path_to_xlsx>" \
+  -n "<keyword>" \
+  -m "<target_url>" \
+  <additional_cli_flags>
+```
+
+- Working directory: `E:/dev/Big-Link-Man`
+- Always pass `-m` with the target URL (prevents interactive prompts)
+- If BrandedPlusRatio is provided and not 0.7, add `-bp <value>`
+- If CustomAnchors is provided, add `-a "<anchors>"`
+- If CLIFlags is provided, append them as-is (e.g. `--tier1-count 6`)
+- BLM credentials are in env vars `BLM_USERNAME` and `BLM_PASSWORD` -- pass them as `-u` and `-p`
+
+**Expected output:** Look for lines like:
+- `Success: Project 'name' created (ID: 42)`
+- `Job file created: jobs/some-file.json`
+
+If exit code is non-zero, stop and report the error including stdout and stderr.
+
+### 3. Run generate-batch
+
+Using the job file path from step 2:
+
+```bash
+E:/dev/Big-Link-Man/.venv/Scripts/python.exe main.py generate-batch \
+  -j "<job_file_path>" \
+  --continue-on-error
+```
+
+- Working directory: `E:/dev/Big-Link-Man`
+- Always include `--continue-on-error`
+- Pass `-u` and `-p` credentials from env vars
+- This step can take several minutes
+
+If exit code is non-zero, stop and report the error.
+
+### 4. Collect Output
+
+After generate-batch completes, the generated content files are in Big-Link-Man's output directory. Copy the relevant output files to the current working directory so they get uploaded to ClickUp.
+
+Look in `E:/dev/Big-Link-Man/output/` for the project folder matching the keyword. Copy all `.md`, `.txt`, and `.html` files from there to the current working directory.
+
+## Important
+
+- NEVER use `uv run` or CheddahBot's Python -- always use BLM's venv at `E:/dev/Big-Link-Man/.venv/Scripts/python.exe`
+- If the venv doesn't exist, stop and report the error
+- Do not modify any BLM source code
+- Do not create subdirectories in the working directory
diff --git a/tests/test_clickup_runner/test_skill_map.py b/tests/test_clickup_runner/test_skill_map.py
index 87d8a4d..5da674c 100644
--- a/tests/test_clickup_runner/test_skill_map.py
+++ b/tests/test_clickup_runner/test_skill_map.py
@@ -64,8 +64,7 @@ class TestGetRoute:
     def test_link_building_build(self):
         route = get_route("Link Building", "build")
         assert route is not None
-        assert route.handler == "claude"
-        assert route.skill_file == "linkbuilding.md"
+        assert route.handler == "blm"
 
     def test_unknown_task_type_returns_none(self):
         assert get_route("Banana Farming", "draft") is None