809 lines
26 KiB
Python
809 lines
26 KiB
Python
"""Link building pipeline tools.
|
|
|
|
Orchestrates the Big-Link-Man CLI tool for automated link building.
|
|
Primary workflow: ingest CORA .xlsx → generate content batch.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import os
|
|
import re
|
|
import subprocess
|
|
from collections.abc import Callable
|
|
from pathlib import Path
|
|
|
|
from . import tool
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Private helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _get_blm_dir(ctx: dict | None) -> str:
|
|
"""Resolve the Big-Link-Man directory from config or env."""
|
|
if ctx and "config" in ctx:
|
|
return ctx["config"].link_building.blm_dir
|
|
return os.getenv("BLM_DIR", "E:/dev/Big-Link-Man")
|
|
|
|
|
|
def _get_blm_timeout(ctx: dict | None) -> int:
|
|
"""Get BLM subprocess timeout from config or default (1800s / 30 min)."""
|
|
if ctx and "config" in ctx:
|
|
return ctx["config"].timeouts.blm
|
|
return 1800
|
|
|
|
|
|
def _run_blm_command(
|
|
args: list[str], blm_dir: str, timeout: int = 1800
|
|
) -> subprocess.CompletedProcess:
|
|
"""Run a Big-Link-Man CLI command via subprocess.
|
|
|
|
Always injects -u/-p from BLM_USERNAME/BLM_PASSWORD env vars.
|
|
"""
|
|
# Use BLM's own venv Python so its dependencies are available
|
|
venv_python = Path(blm_dir) / ".venv" / "Scripts" / "python.exe"
|
|
if not venv_python.exists():
|
|
# Fallback for Linux/Mac
|
|
venv_python = Path(blm_dir) / ".venv" / "bin" / "python"
|
|
if not venv_python.exists():
|
|
raise FileNotFoundError(
|
|
f"No .venv found in {blm_dir}. External tools must have their own venv."
|
|
)
|
|
cmd = [str(venv_python), "main.py", *args]
|
|
|
|
# Inject credentials from env vars
|
|
username = os.getenv("BLM_USERNAME", "")
|
|
password = os.getenv("BLM_PASSWORD", "")
|
|
if username and "-u" not in args and "--username" not in args:
|
|
cmd.extend(["-u", username])
|
|
if password and "-p" not in args and "--password" not in args:
|
|
cmd.extend(["-p", password])
|
|
|
|
log.info("Running BLM command: %s (cwd=%s)", " ".join(cmd), blm_dir)
|
|
result = subprocess.run(
|
|
cmd,
|
|
cwd=blm_dir,
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=timeout,
|
|
)
|
|
log.info("BLM exit code: %d", result.returncode)
|
|
if result.stdout:
|
|
log.debug("BLM stdout: %s", result.stdout[:1000])
|
|
if result.stderr:
|
|
log.debug("BLM stderr: %s", result.stderr[:1000])
|
|
return result
|
|
|
|
|
|
def _build_ingest_args(
|
|
xlsx_path: str,
|
|
project_name: str,
|
|
money_site_url: str = "",
|
|
branded_plus_ratio: float = 0.7,
|
|
custom_anchors: str = "",
|
|
cli_flags: str = "",
|
|
) -> list[str]:
|
|
"""Construct CLI argument list for ingest-cora command."""
|
|
args = ["ingest-cora", "-f", xlsx_path, "-n", project_name]
|
|
|
|
if money_site_url:
|
|
args.extend(["-m", money_site_url])
|
|
|
|
if branded_plus_ratio and branded_plus_ratio != 0.7:
|
|
args.extend(["-bp", str(branded_plus_ratio)])
|
|
|
|
if custom_anchors:
|
|
args.extend(["-a", custom_anchors])
|
|
|
|
# Parse any additional CLI flags
|
|
if cli_flags:
|
|
extra = cli_flags.strip().split()
|
|
args.extend(extra)
|
|
|
|
return args
|
|
|
|
|
|
def _parse_ingest_output(stdout: str) -> dict:
|
|
"""Parse ingest-cora stdout to extract project_id and job_file path.
|
|
|
|
Returns dict with keys: project_id, job_file, project_name, main_keyword
|
|
"""
|
|
result = {
|
|
"project_id": "",
|
|
"job_file": "",
|
|
"project_name": "",
|
|
"main_keyword": "",
|
|
}
|
|
|
|
for line in stdout.splitlines():
|
|
line = line.strip()
|
|
|
|
# Success: Project 'My Project' created (ID: 42)
|
|
m = re.match(r"^Success: Project '(.+)' created \(ID: (\d+)\)$", line)
|
|
if m:
|
|
result["project_name"] = m.group(1)
|
|
result["project_id"] = m.group(2)
|
|
continue
|
|
|
|
# Job file created: jobs/my-project.json
|
|
m = re.match(r"^Job file created: (.+)$", line)
|
|
if m:
|
|
result["job_file"] = m.group(1).strip()
|
|
continue
|
|
|
|
# Main Keyword: precision cnc machining
|
|
m = re.match(r"^Main Keyword: (.+)$", line)
|
|
if m:
|
|
result["main_keyword"] = m.group(1).strip()
|
|
continue
|
|
|
|
return result
|
|
|
|
|
|
def _parse_generate_output(stdout: str) -> dict:
|
|
"""Parse generate-batch stdout to extract completion stats.
|
|
|
|
Returns dict with keys: job_moved_to, success (bool), raw_output
|
|
"""
|
|
result = {
|
|
"job_moved_to": "",
|
|
"success": False,
|
|
"raw_output": stdout,
|
|
}
|
|
|
|
for line in stdout.splitlines():
|
|
line = line.strip()
|
|
|
|
# Job file moved to: jobs/done/my-project.json
|
|
m = re.match(r"^Job file moved to: (.+)$", line)
|
|
if m:
|
|
result["job_moved_to"] = m.group(1).strip()
|
|
result["success"] = True
|
|
continue
|
|
|
|
return result
|
|
|
|
|
|
def _set_status(ctx: dict | None, message: str) -> None:
|
|
"""Log pipeline progress. Previously wrote to KV; now just logs."""
|
|
if message:
|
|
log.info("[LB Pipeline] %s", message)
|
|
|
|
|
|
def _get_clickup_client(ctx: dict | None):
|
|
"""Create a ClickUpClient from tool context, or None if unavailable."""
|
|
if not ctx or not ctx.get("config") or not ctx["config"].clickup.enabled:
|
|
return None
|
|
try:
|
|
from ..clickup import ClickUpClient
|
|
|
|
config = ctx["config"]
|
|
return ClickUpClient(
|
|
api_token=config.clickup.api_token,
|
|
workspace_id=config.clickup.workspace_id,
|
|
task_type_field_name=config.clickup.task_type_field_name,
|
|
)
|
|
except Exception as e:
|
|
log.warning("Could not create ClickUp client: %s", e)
|
|
return None
|
|
|
|
|
|
def _sync_clickup(ctx: dict | None, task_id: str, step: str, message: str) -> None:
|
|
"""Post a progress comment to ClickUp."""
|
|
if not task_id or not ctx:
|
|
return
|
|
|
|
cu_client = _get_clickup_client(ctx)
|
|
if cu_client:
|
|
try:
|
|
cu_client.add_comment(task_id, message)
|
|
except Exception as e:
|
|
log.warning("ClickUp comment failed for task %s: %s", task_id, e)
|
|
finally:
|
|
cu_client.close()
|
|
|
|
|
|
def _find_clickup_task(ctx: dict, keyword: str) -> str:
|
|
"""Find a ClickUp Link Building task matching the given keyword.
|
|
|
|
Looks for "to do" tasks with Work Category == "Link Building" and
|
|
the Keyword custom field fuzzy-matching the keyword param.
|
|
|
|
Returns task_id if found, else "".
|
|
"""
|
|
cu_client = _get_clickup_client(ctx)
|
|
if not cu_client:
|
|
return ""
|
|
|
|
config = ctx.get("config")
|
|
if not config or not config.clickup.space_id:
|
|
return ""
|
|
|
|
try:
|
|
tasks = cu_client.get_tasks_from_space(
|
|
config.clickup.space_id,
|
|
statuses=["to do"],
|
|
)
|
|
except Exception as e:
|
|
log.warning("ClickUp query failed in _find_clickup_task: %s", e)
|
|
return ""
|
|
finally:
|
|
cu_client.close()
|
|
|
|
keyword_norm = _normalize_for_match(keyword)
|
|
|
|
for task in tasks:
|
|
if task.task_type != "Link Building":
|
|
continue
|
|
|
|
task_keyword = task.custom_fields.get("Keyword", "")
|
|
if not task_keyword:
|
|
continue
|
|
|
|
if _fuzzy_keyword_match(keyword_norm, _normalize_for_match(str(task_keyword))):
|
|
# Found a match — move to "automation underway"
|
|
task_id = task.id
|
|
|
|
# Move to "automation underway"
|
|
cu_client2 = _get_clickup_client(ctx)
|
|
if cu_client2:
|
|
try:
|
|
cu_client2.update_task_status(task_id, config.clickup.automation_status)
|
|
except Exception as e:
|
|
log.warning("Failed to update ClickUp status for %s: %s", task_id, e)
|
|
finally:
|
|
cu_client2.close()
|
|
|
|
log.info("Auto-matched ClickUp task %s for keyword '%s'", task_id, keyword)
|
|
return task_id
|
|
|
|
return ""
|
|
|
|
|
|
def _normalize_for_match(text: str) -> str:
|
|
"""Normalize text for fuzzy matching: lowercase, strip non-alnum, collapse spaces."""
|
|
text = text.lower().strip()
|
|
text = re.sub(r"[^a-z0-9\s]", " ", text)
|
|
text = re.sub(r"\s+", " ", text).strip()
|
|
return text
|
|
|
|
|
|
def _fuzzy_keyword_match(a: str, b: str, llm_check: Callable[[str, str], bool] | None = None) -> bool:
|
|
"""Check if two normalized strings match, allowing singular/plural differences.
|
|
|
|
Fast path: exact match after normalization.
|
|
Slow path: ask an LLM if the two keywords are the same aside from plural form.
|
|
Falls back to False if no llm_check is provided and strings differ.
|
|
"""
|
|
if not a or not b:
|
|
return False
|
|
if a == b:
|
|
return True
|
|
if llm_check is None:
|
|
return False
|
|
|
|
# Only call LLM when keywords share most words (possible plural difference).
|
|
words_a = set(a.split())
|
|
words_b = set(b.split())
|
|
shared = words_a & words_b
|
|
total = max(len(words_a), len(words_b))
|
|
if total > 1 and len(shared) < total - 1:
|
|
return False
|
|
|
|
return llm_check(a, b)
|
|
|
|
|
|
def _complete_clickup_task(ctx: dict | None, task_id: str, message: str, status: str = "") -> None:
|
|
"""Mark a ClickUp task as completed."""
|
|
if not task_id or not ctx:
|
|
return
|
|
|
|
config = ctx.get("config")
|
|
skill_map = config.clickup.skill_map if config else {}
|
|
lb_map = skill_map.get("Link Building", {})
|
|
complete_status = status or lb_map.get("complete_status", "complete")
|
|
|
|
cu_client = _get_clickup_client(ctx)
|
|
if cu_client:
|
|
try:
|
|
cu_client.add_comment(task_id, message)
|
|
cu_client.update_task_status(task_id, complete_status)
|
|
except Exception as e:
|
|
log.warning("ClickUp completion failed for %s: %s", task_id, e)
|
|
finally:
|
|
cu_client.close()
|
|
|
|
|
|
def _fail_clickup_task(ctx: dict | None, task_id: str, error_msg: str) -> None:
|
|
"""Mark a ClickUp task as failed."""
|
|
if not task_id or not ctx:
|
|
return
|
|
|
|
config = ctx.get("config")
|
|
error_status = config.clickup.error_status if config else "error"
|
|
|
|
cu_client = _get_clickup_client(ctx)
|
|
if cu_client:
|
|
try:
|
|
cu_client.add_comment(
|
|
task_id,
|
|
f"[FAILED]Link building pipeline failed.\n\nError: {error_msg[:2000]}",
|
|
)
|
|
cu_client.update_task_status(task_id, error_status)
|
|
except Exception as e:
|
|
log.warning("ClickUp failure update failed for %s: %s", task_id, e)
|
|
finally:
|
|
cu_client.close()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Public tools
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
@tool(
|
|
"run_link_building",
|
|
description=(
|
|
"Orchestrator for link building pipelines. Reads the LB Method and "
|
|
"routes to the correct pipeline tool (e.g., run_cora_backlinks for "
|
|
"'Cora Backlinks'). Use when a ClickUp task or chat command requests "
|
|
"link building without specifying the exact pipeline."
|
|
),
|
|
category="linkbuilding",
|
|
)
|
|
def run_link_building(
|
|
lb_method: str = "",
|
|
xlsx_path: str = "",
|
|
project_name: str = "",
|
|
money_site_url: str = "",
|
|
branded_plus_ratio: float = 0.7,
|
|
custom_anchors: str = "",
|
|
cli_flags: str = "",
|
|
ctx: dict | None = None,
|
|
) -> str:
|
|
"""Dispatch to the correct link building pipeline based on lb_method."""
|
|
method = (lb_method or "").strip()
|
|
|
|
if not method:
|
|
return (
|
|
"Skipped: 'LB Method' field is empty. Each Link Building task must have "
|
|
"an LB Method set (e.g. 'Cora Backlinks') before processing can begin."
|
|
)
|
|
|
|
if method == "Cora Backlinks":
|
|
# For Cora Backlinks, xlsx_path is required
|
|
if not xlsx_path:
|
|
return (
|
|
"Skipped: Cora Backlinks requires an xlsx_path. "
|
|
"The folder watcher will trigger this pipeline when a .xlsx "
|
|
"file appears in the watch folder. Or provide xlsx_path explicitly."
|
|
)
|
|
return run_cora_backlinks(
|
|
xlsx_path=xlsx_path,
|
|
project_name=project_name,
|
|
money_site_url=money_site_url,
|
|
branded_plus_ratio=branded_plus_ratio,
|
|
custom_anchors=custom_anchors,
|
|
cli_flags=cli_flags,
|
|
ctx=ctx,
|
|
)
|
|
else:
|
|
return f"Unknown LB Method: '{method}'. Supported methods: Cora Backlinks"
|
|
|
|
|
|
@tool(
|
|
"run_cora_backlinks",
|
|
description=(
|
|
"Full Cora Backlinks pipeline: ingests a CORA .xlsx report via "
|
|
"Big-Link-Man's ingest-cora command, then runs generate-batch to "
|
|
"produce content. Requires xlsx_path and project_name. Optionally "
|
|
"syncs with ClickUp task."
|
|
),
|
|
category="linkbuilding",
|
|
)
|
|
def run_cora_backlinks(
|
|
xlsx_path: str,
|
|
project_name: str,
|
|
money_site_url: str = "",
|
|
branded_plus_ratio: float = 0.7,
|
|
custom_anchors: str = "",
|
|
cli_flags: str = "",
|
|
ctx: dict | None = None,
|
|
) -> str:
|
|
"""Run the full Cora Backlinks pipeline: ingest-cora → generate-batch."""
|
|
if not xlsx_path:
|
|
return "Error: xlsx_path is required for Cora Backlinks pipeline."
|
|
if not project_name:
|
|
return "Error: project_name is required for Cora Backlinks pipeline."
|
|
if not money_site_url:
|
|
return (
|
|
"Error: money_site_url (IMSURL) is required for Cora Backlinks pipeline. "
|
|
"Set the IMSURL custom field on the ClickUp task before processing."
|
|
)
|
|
|
|
blm_dir = _get_blm_dir(ctx)
|
|
|
|
# Check if xlsx file exists
|
|
xlsx = Path(xlsx_path)
|
|
if not xlsx.exists():
|
|
return f"Error: CORA file not found: {xlsx_path}"
|
|
|
|
# Try to find matching ClickUp task
|
|
clickup_task_id = ""
|
|
if ctx:
|
|
clickup_task_id = ctx.get("clickup_task_id", "")
|
|
if not clickup_task_id:
|
|
# Auto-lookup from keyword (derive from project name)
|
|
clickup_task_id = _find_clickup_task(ctx, project_name)
|
|
|
|
output_parts = []
|
|
|
|
# ── Step 1: ingest-cora ──
|
|
_set_status(ctx, f"Step 1/2: Ingesting CORA report for {project_name}...")
|
|
if clickup_task_id:
|
|
_sync_clickup(ctx, clickup_task_id, "ingest", "[STARTED]Starting Cora Backlinks pipeline...")
|
|
|
|
# Convert branded_plus_ratio from string if needed
|
|
try:
|
|
bp_ratio = float(branded_plus_ratio) if branded_plus_ratio else 0.7
|
|
except (ValueError, TypeError):
|
|
bp_ratio = 0.7
|
|
|
|
ingest_args = _build_ingest_args(
|
|
xlsx_path=xlsx_path,
|
|
project_name=project_name,
|
|
money_site_url=money_site_url,
|
|
branded_plus_ratio=bp_ratio,
|
|
custom_anchors=custom_anchors,
|
|
cli_flags=cli_flags,
|
|
)
|
|
|
|
blm_timeout = _get_blm_timeout(ctx)
|
|
try:
|
|
ingest_result = _run_blm_command(ingest_args, blm_dir, timeout=blm_timeout)
|
|
except subprocess.TimeoutExpired:
|
|
error = f"ingest-cora timed out after {blm_timeout // 60} minutes"
|
|
_set_status(ctx, "")
|
|
if clickup_task_id:
|
|
_fail_clickup_task(ctx, clickup_task_id, error)
|
|
return f"Error: {error}"
|
|
|
|
ingest_parsed = _parse_ingest_output(ingest_result.stdout)
|
|
|
|
if ingest_result.returncode != 0 or not ingest_parsed["job_file"]:
|
|
error = (
|
|
f"ingest-cora failed (exit code {ingest_result.returncode}).\n"
|
|
f"stdout: {ingest_result.stdout[-500:]}\n"
|
|
f"stderr: {ingest_result.stderr[-500:]}"
|
|
)
|
|
_set_status(ctx, "")
|
|
if clickup_task_id:
|
|
_fail_clickup_task(ctx, clickup_task_id, error)
|
|
return f"Error: {error}"
|
|
|
|
project_id = ingest_parsed["project_id"]
|
|
job_file = ingest_parsed["job_file"]
|
|
|
|
output_parts.append("## Step 1: Ingest CORA Report")
|
|
output_parts.append(f"- Project: {project_name} (ID: {project_id})")
|
|
output_parts.append(f"- Keyword: {ingest_parsed['main_keyword']}")
|
|
output_parts.append(f"- Job file: {job_file}")
|
|
output_parts.append("")
|
|
|
|
if clickup_task_id:
|
|
_sync_clickup(
|
|
ctx,
|
|
clickup_task_id,
|
|
"ingest_done",
|
|
f"[DONE]CORA report ingested. Project ID: {project_id}. Job file: {job_file}",
|
|
)
|
|
|
|
# ── Step 2: generate-batch ──
|
|
_set_status(ctx, f"Step 2/2: Generating content batch for {project_name}...")
|
|
|
|
# Build the job file path (may be relative to BLM dir)
|
|
job_path = Path(blm_dir) / job_file if not Path(job_file).is_absolute() else Path(job_file)
|
|
|
|
gen_args = ["generate-batch", "-j", str(job_path), "--continue-on-error"]
|
|
|
|
try:
|
|
gen_result = _run_blm_command(gen_args, blm_dir, timeout=blm_timeout)
|
|
except subprocess.TimeoutExpired:
|
|
error = f"generate-batch timed out after {blm_timeout // 60} minutes"
|
|
_set_status(ctx, "")
|
|
if clickup_task_id:
|
|
_fail_clickup_task(ctx, clickup_task_id, error)
|
|
return "\n".join(output_parts) + f"\n\nError: {error}"
|
|
|
|
gen_parsed = _parse_generate_output(gen_result.stdout)
|
|
|
|
if gen_result.returncode != 0:
|
|
error = (
|
|
f"generate-batch failed (exit code {gen_result.returncode}).\n"
|
|
f"stdout: {gen_result.stdout[-500:]}\n"
|
|
f"stderr: {gen_result.stderr[-500:]}"
|
|
)
|
|
_set_status(ctx, "")
|
|
if clickup_task_id:
|
|
_fail_clickup_task(ctx, clickup_task_id, error)
|
|
return "\n".join(output_parts) + f"\n\nError: {error}"
|
|
|
|
output_parts.append("## Step 2: Generate Content Batch")
|
|
output_parts.append(f"- Status: {'Success' if gen_parsed['success'] else 'Completed'}")
|
|
if gen_parsed["job_moved_to"]:
|
|
output_parts.append(f"- Job moved to: {gen_parsed['job_moved_to']}")
|
|
output_parts.append("")
|
|
|
|
# ── Completion ──
|
|
_set_status(ctx, "")
|
|
|
|
if clickup_task_id:
|
|
summary = (
|
|
f"[DONE]Cora Backlinks pipeline completed for {project_name}.\n\n"
|
|
f"Project ID: {project_id}\n"
|
|
f"Keyword: {ingest_parsed['main_keyword']}\n"
|
|
f"Job file: {gen_parsed['job_moved_to'] or job_file}"
|
|
)
|
|
_complete_clickup_task(ctx, clickup_task_id, summary)
|
|
|
|
output_parts.append("## ClickUp Sync")
|
|
output_parts.append(f"- Task `{clickup_task_id}` completed")
|
|
output_parts.append("- Status set to 'complete'")
|
|
|
|
return "\n".join(output_parts)
|
|
|
|
|
|
@tool(
|
|
"blm_ingest_cora",
|
|
description=(
|
|
"Standalone CORA ingest: runs Big-Link-Man's ingest-cora command "
|
|
"to parse a CORA .xlsx report and create a project. Returns the "
|
|
"project ID and job file path without running generate-batch."
|
|
),
|
|
category="linkbuilding",
|
|
)
|
|
def blm_ingest_cora(
|
|
xlsx_path: str,
|
|
project_name: str,
|
|
money_site_url: str = "",
|
|
branded_plus_ratio: float = 0.7,
|
|
custom_anchors: str = "",
|
|
cli_flags: str = "",
|
|
ctx: dict | None = None,
|
|
) -> str:
|
|
"""Run ingest-cora only and return project ID + job file path."""
|
|
if not xlsx_path:
|
|
return "Error: xlsx_path is required."
|
|
if not project_name:
|
|
return "Error: project_name is required."
|
|
|
|
blm_dir = _get_blm_dir(ctx)
|
|
|
|
xlsx = Path(xlsx_path)
|
|
if not xlsx.exists():
|
|
return f"Error: CORA file not found: {xlsx_path}"
|
|
|
|
try:
|
|
bp_ratio = float(branded_plus_ratio) if branded_plus_ratio else 0.7
|
|
except (ValueError, TypeError):
|
|
bp_ratio = 0.7
|
|
|
|
ingest_args = _build_ingest_args(
|
|
xlsx_path=xlsx_path,
|
|
project_name=project_name,
|
|
money_site_url=money_site_url,
|
|
branded_plus_ratio=bp_ratio,
|
|
custom_anchors=custom_anchors,
|
|
cli_flags=cli_flags,
|
|
)
|
|
|
|
blm_timeout = _get_blm_timeout(ctx)
|
|
try:
|
|
result = _run_blm_command(ingest_args, blm_dir, timeout=blm_timeout)
|
|
except subprocess.TimeoutExpired:
|
|
return f"Error: ingest-cora timed out after {blm_timeout // 60} minutes."
|
|
|
|
parsed = _parse_ingest_output(result.stdout)
|
|
|
|
if result.returncode != 0 or not parsed["job_file"]:
|
|
return (
|
|
f"Error: ingest-cora failed (exit code {result.returncode}).\n"
|
|
f"stdout: {result.stdout[-500:]}\n"
|
|
f"stderr: {result.stderr[-500:]}"
|
|
)
|
|
|
|
return (
|
|
f"CORA ingest complete.\n\n"
|
|
f"- Project: {parsed['project_name']} (ID: {parsed['project_id']})\n"
|
|
f"- Keyword: {parsed['main_keyword']}\n"
|
|
f"- Job file: {parsed['job_file']}\n\n"
|
|
f"Run `blm_generate_batch` with this job file to generate content."
|
|
)
|
|
|
|
|
|
@tool(
|
|
"blm_generate_batch",
|
|
description=(
|
|
"Standalone content generation: runs Big-Link-Man's generate-batch "
|
|
"command on an existing job file. Use after ingest-cora or for "
|
|
"re-running generation on a manually created job."
|
|
),
|
|
category="linkbuilding",
|
|
)
|
|
def blm_generate_batch(
|
|
job_file: str,
|
|
continue_on_error: bool = True,
|
|
debug: bool = False,
|
|
ctx: dict | None = None,
|
|
) -> str:
|
|
"""Run generate-batch on an existing job file."""
|
|
if not job_file:
|
|
return "Error: job_file is required."
|
|
|
|
blm_dir = _get_blm_dir(ctx)
|
|
job_path = Path(blm_dir) / job_file if not Path(job_file).is_absolute() else Path(job_file)
|
|
|
|
if not job_path.exists():
|
|
return f"Error: Job file not found: {job_path}"
|
|
|
|
args = ["generate-batch", "-j", str(job_path)]
|
|
if continue_on_error:
|
|
args.append("--continue-on-error")
|
|
if debug:
|
|
args.append("--debug")
|
|
|
|
blm_timeout = _get_blm_timeout(ctx)
|
|
try:
|
|
result = _run_blm_command(args, blm_dir, timeout=blm_timeout)
|
|
except subprocess.TimeoutExpired:
|
|
return f"Error: generate-batch timed out after {blm_timeout // 60} minutes."
|
|
|
|
parsed = _parse_generate_output(result.stdout)
|
|
|
|
if result.returncode != 0:
|
|
return (
|
|
f"Error: generate-batch failed (exit code {result.returncode}).\n"
|
|
f"stdout: {result.stdout[-500:]}\n"
|
|
f"stderr: {result.stderr[-500:]}"
|
|
)
|
|
|
|
output = "Content generation complete.\n\n"
|
|
output += f"- Status: {'Success' if parsed['success'] else 'Completed'}\n"
|
|
if parsed["job_moved_to"]:
|
|
output += f"- Job moved to: {parsed['job_moved_to']}\n"
|
|
return output
|
|
|
|
|
|
@tool(
|
|
"scan_cora_folder",
|
|
description=(
|
|
"Scan the Cora inbox watch folder for .xlsx files and report "
|
|
"their processing status. Shows which files are new, processed, "
|
|
"or failed, and whether they match a ClickUp task."
|
|
),
|
|
category="linkbuilding",
|
|
)
|
|
def scan_cora_folder(ctx: dict | None = None) -> str:
|
|
"""Scan the watch folder and return status of .xlsx files."""
|
|
if not ctx or "config" not in ctx:
|
|
return "Error: scan_cora_folder requires agent context."
|
|
|
|
config = ctx["config"]
|
|
watch_folder = config.link_building.watch_folder
|
|
if not watch_folder:
|
|
return "Watch folder not configured (link_building.watch_folder is empty)."
|
|
|
|
watch_path = Path(watch_folder)
|
|
if not watch_path.exists():
|
|
return f"Watch folder does not exist: {watch_folder}"
|
|
|
|
xlsx_files = sorted(watch_path.glob("*.xlsx"))
|
|
|
|
if not xlsx_files:
|
|
return f"No .xlsx files found in {watch_folder}."
|
|
|
|
lines = [f"## Cora Inbox: {watch_folder}\n"]
|
|
|
|
processed_dir = watch_path / "processed"
|
|
processed_names = set()
|
|
if processed_dir.exists():
|
|
processed_names = {f.name for f in processed_dir.glob("*.xlsx")}
|
|
|
|
for f in xlsx_files:
|
|
filename = f.name
|
|
if filename.startswith("~$"):
|
|
continue
|
|
status = "processed" if filename in processed_names else "new"
|
|
lines.append(f"- **{filename}** — status: {status}")
|
|
|
|
# Check processed subfolder
|
|
processed_dir = watch_path / "processed"
|
|
if processed_dir.exists():
|
|
processed = list(processed_dir.glob("*.xlsx"))
|
|
if processed:
|
|
lines.append(f"\n### Processed ({len(processed)} files)")
|
|
for f in processed[:10]:
|
|
lines.append(f"- {f.name}")
|
|
if len(processed) > 10:
|
|
lines.append(f"- ... and {len(processed) - 10} more")
|
|
|
|
return "\n".join(lines)
|
|
|
|
|
|
@tool(
|
|
"setup_linkbuilding_fields",
|
|
description=(
|
|
"One-time setup tool: creates the required ClickUp custom fields "
|
|
"(LB Method, Keyword, CoraFile, etc.) across all lists in the space. "
|
|
"Safe to re-run — skips fields that already exist."
|
|
),
|
|
category="linkbuilding",
|
|
)
|
|
def setup_linkbuilding_fields(ctx: dict | None = None) -> str:
|
|
"""Create link building custom fields in ClickUp."""
|
|
if not ctx or "config" not in ctx:
|
|
return "Error: requires agent context."
|
|
|
|
config = ctx["config"]
|
|
if not config.clickup.enabled:
|
|
return "Error: ClickUp integration not enabled."
|
|
|
|
cu_client = _get_clickup_client(ctx)
|
|
if not cu_client:
|
|
return "Error: could not create ClickUp client."
|
|
|
|
try:
|
|
space_id = config.clickup.space_id
|
|
list_ids = cu_client.get_list_ids_from_space(space_id)
|
|
if not list_ids:
|
|
return f"No lists found in space {space_id}."
|
|
|
|
fields_to_create = [
|
|
{
|
|
"name": "LB Method",
|
|
"type": "drop_down",
|
|
"type_config": {
|
|
"options": [
|
|
{"name": "Cora Backlinks", "color": "#04A9F4"},
|
|
]
|
|
},
|
|
},
|
|
{"name": "Keyword", "type": "short_text"},
|
|
{"name": "CoraFile", "type": "short_text"},
|
|
{"name": "CustomAnchors", "type": "short_text"},
|
|
{"name": "BrandedPlusRatio", "type": "short_text"},
|
|
{"name": "CLIFlags", "type": "short_text"},
|
|
]
|
|
|
|
results = []
|
|
for list_id in list_ids:
|
|
existing = cu_client.get_custom_fields(list_id)
|
|
existing_names = {f.get("name") for f in existing}
|
|
|
|
for field_def in fields_to_create:
|
|
if field_def["name"] in existing_names:
|
|
continue
|
|
|
|
try:
|
|
cu_client.create_custom_field(
|
|
list_id,
|
|
field_def["name"],
|
|
field_def["type"],
|
|
field_def.get("type_config"),
|
|
)
|
|
results.append(f"Created '{field_def['name']}' in list {list_id}")
|
|
except Exception as e:
|
|
results.append(f"Failed to create '{field_def['name']}' in list {list_id}: {e}")
|
|
|
|
if not results:
|
|
return "All fields already exist in all lists."
|
|
|
|
return "## Setup Results\n\n" + "\n".join(f"- {r}" for r in results)
|
|
|
|
finally:
|
|
cu_client.close()
|