Add link building tool module with pipeline orchestration
- run_link_building: dispatcher that routes to correct pipeline by LB Method - run_cora_backlinks: full pipeline (ingest-cora → generate-batch) with ClickUp sync - blm_ingest_cora: standalone ingest tool - blm_generate_batch: standalone generate tool - scan_cora_folder: utility to inspect watch folder contents - setup_linkbuilding_fields: one-time ClickUp field creation - Private helpers for subprocess execution, output parsing, fuzzy matching Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>cora-start
parent
e2dca938a1
commit
2d5ed29c0d
|
|
@ -0,0 +1,840 @@
|
|||
"""Link building pipeline tools.
|
||||
|
||||
Orchestrates the Big-Link-Man CLI tool for automated link building.
|
||||
Primary workflow: ingest CORA .xlsx → generate content batch.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
from datetime import UTC, datetime
|
||||
from pathlib import Path
|
||||
|
||||
from . import tool
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Private helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _get_blm_dir(ctx: dict | None) -> str:
|
||||
"""Resolve the Big-Link-Man directory from config or env."""
|
||||
if ctx and "config" in ctx:
|
||||
return ctx["config"].link_building.blm_dir
|
||||
return os.getenv("BLM_DIR", "E:/dev/Big-Link-Man")
|
||||
|
||||
|
||||
def _run_blm_command(args: list[str], blm_dir: str, timeout: int = 1800) -> subprocess.CompletedProcess:
|
||||
"""Run a Big-Link-Man CLI command via subprocess.
|
||||
|
||||
Always injects -u/-p from BLM_USERNAME/BLM_PASSWORD env vars.
|
||||
"""
|
||||
cmd = ["uv", "run", "python", "main.py"] + args
|
||||
|
||||
# Inject credentials from env vars
|
||||
username = os.getenv("BLM_USERNAME", "")
|
||||
password = os.getenv("BLM_PASSWORD", "")
|
||||
if username and "-u" not in args and "--username" not in args:
|
||||
cmd.extend(["-u", username])
|
||||
if password and "-p" not in args and "--password" not in args:
|
||||
cmd.extend(["-p", password])
|
||||
|
||||
log.info("Running BLM command: %s (cwd=%s)", " ".join(cmd), blm_dir)
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
cwd=blm_dir,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=timeout,
|
||||
)
|
||||
log.info("BLM exit code: %d", result.returncode)
|
||||
if result.stdout:
|
||||
log.debug("BLM stdout: %s", result.stdout[:1000])
|
||||
if result.stderr:
|
||||
log.debug("BLM stderr: %s", result.stderr[:1000])
|
||||
return result
|
||||
|
||||
|
||||
def _build_ingest_args(
|
||||
xlsx_path: str,
|
||||
project_name: str,
|
||||
money_site_url: str = "",
|
||||
branded_plus_ratio: float = 0.7,
|
||||
custom_anchors: str = "",
|
||||
cli_flags: str = "",
|
||||
) -> list[str]:
|
||||
"""Construct CLI argument list for ingest-cora command."""
|
||||
args = ["ingest-cora", "-f", xlsx_path, "-n", project_name]
|
||||
|
||||
# Always pass -m to prevent interactive stdin prompt
|
||||
if money_site_url:
|
||||
args.extend(["-m", money_site_url])
|
||||
else:
|
||||
args.extend(["-m", "https://placeholder.example.com"])
|
||||
|
||||
if branded_plus_ratio and branded_plus_ratio != 0.7:
|
||||
args.extend(["-bp", str(branded_plus_ratio)])
|
||||
|
||||
if custom_anchors:
|
||||
args.extend(["-a", custom_anchors])
|
||||
|
||||
# Parse any additional CLI flags
|
||||
if cli_flags:
|
||||
extra = cli_flags.strip().split()
|
||||
args.extend(extra)
|
||||
|
||||
return args
|
||||
|
||||
|
||||
def _parse_ingest_output(stdout: str) -> dict:
|
||||
"""Parse ingest-cora stdout to extract project_id and job_file path.
|
||||
|
||||
Returns dict with keys: project_id, job_file, project_name, main_keyword
|
||||
"""
|
||||
result = {
|
||||
"project_id": "",
|
||||
"job_file": "",
|
||||
"project_name": "",
|
||||
"main_keyword": "",
|
||||
}
|
||||
|
||||
for line in stdout.splitlines():
|
||||
line = line.strip()
|
||||
|
||||
# Success: Project 'My Project' created (ID: 42)
|
||||
m = re.match(r"^Success: Project '(.+)' created \(ID: (\d+)\)$", line)
|
||||
if m:
|
||||
result["project_name"] = m.group(1)
|
||||
result["project_id"] = m.group(2)
|
||||
continue
|
||||
|
||||
# Job file created: jobs/my-project.json
|
||||
m = re.match(r"^Job file created: (.+)$", line)
|
||||
if m:
|
||||
result["job_file"] = m.group(1).strip()
|
||||
continue
|
||||
|
||||
# Main Keyword: precision cnc machining
|
||||
m = re.match(r"^Main Keyword: (.+)$", line)
|
||||
if m:
|
||||
result["main_keyword"] = m.group(1).strip()
|
||||
continue
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _parse_generate_output(stdout: str) -> dict:
|
||||
"""Parse generate-batch stdout to extract completion stats.
|
||||
|
||||
Returns dict with keys: job_moved_to, success (bool), raw_output
|
||||
"""
|
||||
result = {
|
||||
"job_moved_to": "",
|
||||
"success": False,
|
||||
"raw_output": stdout,
|
||||
}
|
||||
|
||||
for line in stdout.splitlines():
|
||||
line = line.strip()
|
||||
|
||||
# Job file moved to: jobs/done/my-project.json
|
||||
m = re.match(r"^Job file moved to: (.+)$", line)
|
||||
if m:
|
||||
result["job_moved_to"] = m.group(1).strip()
|
||||
result["success"] = True
|
||||
continue
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _set_status(ctx: dict | None, message: str) -> None:
|
||||
"""Write pipeline progress to KV store for UI polling."""
|
||||
if ctx and "db" in ctx:
|
||||
ctx["db"].kv_set("linkbuilding:status", message)
|
||||
|
||||
|
||||
def _get_clickup_client(ctx: dict | None):
|
||||
"""Create a ClickUpClient from tool context, or None if unavailable."""
|
||||
if not ctx or not ctx.get("config") or not ctx["config"].clickup.enabled:
|
||||
return None
|
||||
try:
|
||||
from ..clickup import ClickUpClient
|
||||
|
||||
config = ctx["config"]
|
||||
return ClickUpClient(
|
||||
api_token=config.clickup.api_token,
|
||||
workspace_id=config.clickup.workspace_id,
|
||||
task_type_field_name=config.clickup.task_type_field_name,
|
||||
)
|
||||
except Exception as e:
|
||||
log.warning("Could not create ClickUp client: %s", e)
|
||||
return None
|
||||
|
||||
|
||||
def _sync_clickup(ctx: dict | None, task_id: str, step: str, message: str) -> None:
|
||||
"""Post a comment to ClickUp and update KV state."""
|
||||
if not task_id or not ctx:
|
||||
return
|
||||
|
||||
# Update KV store
|
||||
db = ctx.get("db")
|
||||
if db:
|
||||
kv_key = f"clickup:task:{task_id}:state"
|
||||
raw = db.kv_get(kv_key)
|
||||
if raw:
|
||||
try:
|
||||
state = json.loads(raw)
|
||||
state["last_step"] = step
|
||||
state["last_message"] = message
|
||||
db.kv_set(kv_key, json.dumps(state))
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# Post comment to ClickUp
|
||||
cu_client = _get_clickup_client(ctx)
|
||||
if cu_client:
|
||||
try:
|
||||
cu_client.add_comment(task_id, message)
|
||||
except Exception as e:
|
||||
log.warning("ClickUp comment failed for task %s: %s", task_id, e)
|
||||
finally:
|
||||
cu_client.close()
|
||||
|
||||
|
||||
def _find_clickup_task(ctx: dict, keyword: str) -> str:
|
||||
"""Find a ClickUp Link Building task matching the given keyword.
|
||||
|
||||
Looks for "to do" tasks with Work Category == "Link Building" and
|
||||
the Keyword custom field fuzzy-matching the keyword param.
|
||||
|
||||
Returns task_id if found, else "".
|
||||
"""
|
||||
cu_client = _get_clickup_client(ctx)
|
||||
if not cu_client:
|
||||
return ""
|
||||
|
||||
config = ctx.get("config")
|
||||
if not config or not config.clickup.space_id:
|
||||
return ""
|
||||
|
||||
try:
|
||||
tasks = cu_client.get_tasks_from_space(
|
||||
config.clickup.space_id,
|
||||
statuses=["to do"],
|
||||
)
|
||||
except Exception as e:
|
||||
log.warning("ClickUp query failed in _find_clickup_task: %s", e)
|
||||
return ""
|
||||
finally:
|
||||
cu_client.close()
|
||||
|
||||
keyword_norm = _normalize_for_match(keyword)
|
||||
|
||||
for task in tasks:
|
||||
if task.task_type != "Link Building":
|
||||
continue
|
||||
|
||||
task_keyword = task.custom_fields.get("Keyword", "")
|
||||
if not task_keyword:
|
||||
continue
|
||||
|
||||
if _fuzzy_keyword_match(keyword_norm, _normalize_for_match(str(task_keyword))):
|
||||
# Found a match — create executing state
|
||||
task_id = task.id
|
||||
now = datetime.now(UTC).isoformat()
|
||||
state = {
|
||||
"state": "executing",
|
||||
"clickup_task_id": task_id,
|
||||
"clickup_task_name": task.name,
|
||||
"task_type": task.task_type,
|
||||
"skill_name": "run_link_building",
|
||||
"discovered_at": now,
|
||||
"started_at": now,
|
||||
"completed_at": None,
|
||||
"error": None,
|
||||
"deliverable_paths": [],
|
||||
"custom_fields": task.custom_fields,
|
||||
}
|
||||
|
||||
db = ctx.get("db")
|
||||
if db:
|
||||
db.kv_set(f"clickup:task:{task_id}:state", json.dumps(state))
|
||||
|
||||
# Move to "in progress"
|
||||
cu_client2 = _get_clickup_client(ctx)
|
||||
if cu_client2:
|
||||
try:
|
||||
cu_client2.update_task_status(task_id, config.clickup.in_progress_status)
|
||||
except Exception as e:
|
||||
log.warning("Failed to update ClickUp status for %s: %s", task_id, e)
|
||||
finally:
|
||||
cu_client2.close()
|
||||
|
||||
log.info("Auto-matched ClickUp task %s for keyword '%s'", task_id, keyword)
|
||||
return task_id
|
||||
|
||||
return ""
|
||||
|
||||
|
||||
def _normalize_for_match(text: str) -> str:
|
||||
"""Normalize text for fuzzy matching: lowercase, strip non-alnum, collapse spaces."""
|
||||
text = text.lower().strip()
|
||||
text = re.sub(r"[^a-z0-9\s]", " ", text)
|
||||
text = re.sub(r"\s+", " ", text).strip()
|
||||
return text
|
||||
|
||||
|
||||
def _fuzzy_keyword_match(a: str, b: str) -> bool:
|
||||
"""Check if two normalized strings are a fuzzy match.
|
||||
|
||||
Matches if: exact, substring in either direction, or >80% word overlap.
|
||||
"""
|
||||
if not a or not b:
|
||||
return False
|
||||
if a == b:
|
||||
return True
|
||||
if a in b or b in a:
|
||||
return True
|
||||
|
||||
# Word overlap check
|
||||
words_a = set(a.split())
|
||||
words_b = set(b.split())
|
||||
if not words_a or not words_b:
|
||||
return False
|
||||
overlap = len(words_a & words_b)
|
||||
min_len = min(len(words_a), len(words_b))
|
||||
return overlap / min_len >= 0.8 if min_len > 0 else False
|
||||
|
||||
|
||||
def _complete_clickup_task(ctx: dict | None, task_id: str, message: str, status: str = "") -> None:
|
||||
"""Mark a ClickUp task as completed and update KV state."""
|
||||
if not task_id or not ctx:
|
||||
return
|
||||
|
||||
config = ctx.get("config")
|
||||
skill_map = config.clickup.skill_map if config else {}
|
||||
lb_map = skill_map.get("Link Building", {})
|
||||
complete_status = status or lb_map.get("complete_status", "complete")
|
||||
|
||||
db = ctx.get("db")
|
||||
if db:
|
||||
kv_key = f"clickup:task:{task_id}:state"
|
||||
raw = db.kv_get(kv_key)
|
||||
if raw:
|
||||
try:
|
||||
state = json.loads(raw)
|
||||
state["state"] = "completed"
|
||||
state["completed_at"] = datetime.now(UTC).isoformat()
|
||||
db.kv_set(kv_key, json.dumps(state))
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
cu_client = _get_clickup_client(ctx)
|
||||
if cu_client:
|
||||
try:
|
||||
cu_client.add_comment(task_id, message)
|
||||
cu_client.update_task_status(task_id, complete_status)
|
||||
except Exception as e:
|
||||
log.warning("ClickUp completion failed for %s: %s", task_id, e)
|
||||
finally:
|
||||
cu_client.close()
|
||||
|
||||
|
||||
def _fail_clickup_task(ctx: dict | None, task_id: str, error_msg: str) -> None:
|
||||
"""Mark a ClickUp task as failed and update KV state."""
|
||||
if not task_id or not ctx:
|
||||
return
|
||||
|
||||
config = ctx.get("config")
|
||||
skill_map = config.clickup.skill_map if config else {}
|
||||
lb_map = skill_map.get("Link Building", {})
|
||||
error_status = lb_map.get("error_status", "internal review")
|
||||
|
||||
db = ctx.get("db")
|
||||
if db:
|
||||
kv_key = f"clickup:task:{task_id}:state"
|
||||
raw = db.kv_get(kv_key)
|
||||
if raw:
|
||||
try:
|
||||
state = json.loads(raw)
|
||||
state["state"] = "failed"
|
||||
state["error"] = error_msg
|
||||
state["completed_at"] = datetime.now(UTC).isoformat()
|
||||
db.kv_set(kv_key, json.dumps(state))
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
cu_client = _get_clickup_client(ctx)
|
||||
if cu_client:
|
||||
try:
|
||||
cu_client.add_comment(
|
||||
task_id,
|
||||
f"❌ Link building pipeline failed.\n\nError: {error_msg[:2000]}",
|
||||
)
|
||||
cu_client.update_task_status(task_id, error_status)
|
||||
except Exception as e:
|
||||
log.warning("ClickUp failure update failed for %s: %s", task_id, e)
|
||||
finally:
|
||||
cu_client.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public tools
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@tool(
|
||||
"run_link_building",
|
||||
description=(
|
||||
"Orchestrator for link building pipelines. Reads the LB Method and "
|
||||
"routes to the correct pipeline tool (e.g., run_cora_backlinks for "
|
||||
"'Cora Backlinks'). Use when a ClickUp task or chat command requests "
|
||||
"link building without specifying the exact pipeline."
|
||||
),
|
||||
category="linkbuilding",
|
||||
)
|
||||
def run_link_building(
|
||||
lb_method: str = "",
|
||||
xlsx_path: str = "",
|
||||
project_name: str = "",
|
||||
money_site_url: str = "",
|
||||
branded_plus_ratio: float = 0.7,
|
||||
custom_anchors: str = "",
|
||||
cli_flags: str = "",
|
||||
ctx: dict | None = None,
|
||||
) -> str:
|
||||
"""Dispatch to the correct link building pipeline based on lb_method."""
|
||||
method = (lb_method or "Cora Backlinks").strip()
|
||||
|
||||
if method == "Cora Backlinks":
|
||||
# For Cora Backlinks, xlsx_path is required
|
||||
if not xlsx_path:
|
||||
return (
|
||||
"Skipped: Cora Backlinks requires an xlsx_path. "
|
||||
"The folder watcher will trigger this pipeline when a .xlsx "
|
||||
"file appears in the watch folder. Or provide xlsx_path explicitly."
|
||||
)
|
||||
return run_cora_backlinks(
|
||||
xlsx_path=xlsx_path,
|
||||
project_name=project_name,
|
||||
money_site_url=money_site_url,
|
||||
branded_plus_ratio=branded_plus_ratio,
|
||||
custom_anchors=custom_anchors,
|
||||
cli_flags=cli_flags,
|
||||
ctx=ctx,
|
||||
)
|
||||
else:
|
||||
return f"Unknown LB Method: '{method}'. Supported methods: Cora Backlinks"
|
||||
|
||||
|
||||
@tool(
|
||||
"run_cora_backlinks",
|
||||
description=(
|
||||
"Full Cora Backlinks pipeline: ingests a CORA .xlsx report via "
|
||||
"Big-Link-Man's ingest-cora command, then runs generate-batch to "
|
||||
"produce content. Requires xlsx_path and project_name. Optionally "
|
||||
"syncs with ClickUp task."
|
||||
),
|
||||
category="linkbuilding",
|
||||
)
|
||||
def run_cora_backlinks(
|
||||
xlsx_path: str,
|
||||
project_name: str,
|
||||
money_site_url: str = "",
|
||||
branded_plus_ratio: float = 0.7,
|
||||
custom_anchors: str = "",
|
||||
cli_flags: str = "",
|
||||
ctx: dict | None = None,
|
||||
) -> str:
|
||||
"""Run the full Cora Backlinks pipeline: ingest-cora → generate-batch."""
|
||||
if not xlsx_path:
|
||||
return "Error: xlsx_path is required for Cora Backlinks pipeline."
|
||||
if not project_name:
|
||||
return "Error: project_name is required for Cora Backlinks pipeline."
|
||||
|
||||
blm_dir = _get_blm_dir(ctx)
|
||||
|
||||
# Check if xlsx file exists
|
||||
xlsx = Path(xlsx_path)
|
||||
if not xlsx.exists():
|
||||
return f"Error: CORA file not found: {xlsx_path}"
|
||||
|
||||
# Try to find matching ClickUp task
|
||||
clickup_task_id = ""
|
||||
if ctx:
|
||||
clickup_task_id = ctx.get("clickup_task_id", "")
|
||||
if not clickup_task_id:
|
||||
# Auto-lookup from keyword (derive from project name)
|
||||
clickup_task_id = _find_clickup_task(ctx, project_name)
|
||||
|
||||
output_parts = []
|
||||
|
||||
# ── Step 1: ingest-cora ──
|
||||
_set_status(ctx, f"Step 1/2: Ingesting CORA report for {project_name}...")
|
||||
if clickup_task_id:
|
||||
_sync_clickup(ctx, clickup_task_id, "ingest", "🔄 Starting Cora Backlinks pipeline...")
|
||||
|
||||
# Convert branded_plus_ratio from string if needed
|
||||
try:
|
||||
bp_ratio = float(branded_plus_ratio) if branded_plus_ratio else 0.7
|
||||
except (ValueError, TypeError):
|
||||
bp_ratio = 0.7
|
||||
|
||||
ingest_args = _build_ingest_args(
|
||||
xlsx_path=xlsx_path,
|
||||
project_name=project_name,
|
||||
money_site_url=money_site_url,
|
||||
branded_plus_ratio=bp_ratio,
|
||||
custom_anchors=custom_anchors,
|
||||
cli_flags=cli_flags,
|
||||
)
|
||||
|
||||
try:
|
||||
ingest_result = _run_blm_command(ingest_args, blm_dir)
|
||||
except subprocess.TimeoutExpired:
|
||||
error = "ingest-cora timed out after 30 minutes"
|
||||
_set_status(ctx, "")
|
||||
if clickup_task_id:
|
||||
_fail_clickup_task(ctx, clickup_task_id, error)
|
||||
return f"Error: {error}"
|
||||
|
||||
ingest_parsed = _parse_ingest_output(ingest_result.stdout)
|
||||
|
||||
if ingest_result.returncode != 0 or not ingest_parsed["job_file"]:
|
||||
error = (
|
||||
f"ingest-cora failed (exit code {ingest_result.returncode}).\n"
|
||||
f"stdout: {ingest_result.stdout[-500:]}\n"
|
||||
f"stderr: {ingest_result.stderr[-500:]}"
|
||||
)
|
||||
_set_status(ctx, "")
|
||||
if clickup_task_id:
|
||||
_fail_clickup_task(ctx, clickup_task_id, error)
|
||||
return f"Error: {error}"
|
||||
|
||||
project_id = ingest_parsed["project_id"]
|
||||
job_file = ingest_parsed["job_file"]
|
||||
|
||||
output_parts.append(f"## Step 1: Ingest CORA Report")
|
||||
output_parts.append(f"- Project: {project_name} (ID: {project_id})")
|
||||
output_parts.append(f"- Keyword: {ingest_parsed['main_keyword']}")
|
||||
output_parts.append(f"- Job file: {job_file}")
|
||||
output_parts.append("")
|
||||
|
||||
if clickup_task_id:
|
||||
_sync_clickup(
|
||||
ctx, clickup_task_id, "ingest_done",
|
||||
f"✅ CORA report ingested. Project ID: {project_id}. Job file: {job_file}",
|
||||
)
|
||||
|
||||
# ── Step 2: generate-batch ──
|
||||
_set_status(ctx, f"Step 2/2: Generating content batch for {project_name}...")
|
||||
|
||||
# Build the job file path (may be relative to BLM dir)
|
||||
job_path = Path(blm_dir) / job_file if not Path(job_file).is_absolute() else Path(job_file)
|
||||
|
||||
gen_args = ["generate-batch", "-j", str(job_path), "--continue-on-error"]
|
||||
|
||||
try:
|
||||
gen_result = _run_blm_command(gen_args, blm_dir)
|
||||
except subprocess.TimeoutExpired:
|
||||
error = "generate-batch timed out after 30 minutes"
|
||||
_set_status(ctx, "")
|
||||
if clickup_task_id:
|
||||
_fail_clickup_task(ctx, clickup_task_id, error)
|
||||
return "\n".join(output_parts) + f"\n\nError: {error}"
|
||||
|
||||
gen_parsed = _parse_generate_output(gen_result.stdout)
|
||||
|
||||
if gen_result.returncode != 0:
|
||||
error = (
|
||||
f"generate-batch failed (exit code {gen_result.returncode}).\n"
|
||||
f"stdout: {gen_result.stdout[-500:]}\n"
|
||||
f"stderr: {gen_result.stderr[-500:]}"
|
||||
)
|
||||
_set_status(ctx, "")
|
||||
if clickup_task_id:
|
||||
_fail_clickup_task(ctx, clickup_task_id, error)
|
||||
return "\n".join(output_parts) + f"\n\nError: {error}"
|
||||
|
||||
output_parts.append(f"## Step 2: Generate Content Batch")
|
||||
output_parts.append(f"- Status: {'Success' if gen_parsed['success'] else 'Completed'}")
|
||||
if gen_parsed["job_moved_to"]:
|
||||
output_parts.append(f"- Job moved to: {gen_parsed['job_moved_to']}")
|
||||
output_parts.append("")
|
||||
|
||||
# ── Completion ──
|
||||
_set_status(ctx, "")
|
||||
|
||||
if clickup_task_id:
|
||||
summary = (
|
||||
f"✅ Cora Backlinks pipeline completed for {project_name}.\n\n"
|
||||
f"Project ID: {project_id}\n"
|
||||
f"Keyword: {ingest_parsed['main_keyword']}\n"
|
||||
f"Job file: {gen_parsed['job_moved_to'] or job_file}"
|
||||
)
|
||||
_complete_clickup_task(ctx, clickup_task_id, summary)
|
||||
|
||||
output_parts.append("## ClickUp Sync")
|
||||
output_parts.append(f"- Task `{clickup_task_id}` completed")
|
||||
output_parts.append(f"- Status set to 'complete'")
|
||||
|
||||
return "\n".join(output_parts)
|
||||
|
||||
|
||||
@tool(
|
||||
"blm_ingest_cora",
|
||||
description=(
|
||||
"Standalone CORA ingest: runs Big-Link-Man's ingest-cora command "
|
||||
"to parse a CORA .xlsx report and create a project. Returns the "
|
||||
"project ID and job file path without running generate-batch."
|
||||
),
|
||||
category="linkbuilding",
|
||||
)
|
||||
def blm_ingest_cora(
|
||||
xlsx_path: str,
|
||||
project_name: str,
|
||||
money_site_url: str = "",
|
||||
branded_plus_ratio: float = 0.7,
|
||||
custom_anchors: str = "",
|
||||
cli_flags: str = "",
|
||||
ctx: dict | None = None,
|
||||
) -> str:
|
||||
"""Run ingest-cora only and return project ID + job file path."""
|
||||
if not xlsx_path:
|
||||
return "Error: xlsx_path is required."
|
||||
if not project_name:
|
||||
return "Error: project_name is required."
|
||||
|
||||
blm_dir = _get_blm_dir(ctx)
|
||||
|
||||
xlsx = Path(xlsx_path)
|
||||
if not xlsx.exists():
|
||||
return f"Error: CORA file not found: {xlsx_path}"
|
||||
|
||||
try:
|
||||
bp_ratio = float(branded_plus_ratio) if branded_plus_ratio else 0.7
|
||||
except (ValueError, TypeError):
|
||||
bp_ratio = 0.7
|
||||
|
||||
ingest_args = _build_ingest_args(
|
||||
xlsx_path=xlsx_path,
|
||||
project_name=project_name,
|
||||
money_site_url=money_site_url,
|
||||
branded_plus_ratio=bp_ratio,
|
||||
custom_anchors=custom_anchors,
|
||||
cli_flags=cli_flags,
|
||||
)
|
||||
|
||||
try:
|
||||
result = _run_blm_command(ingest_args, blm_dir)
|
||||
except subprocess.TimeoutExpired:
|
||||
return "Error: ingest-cora timed out after 30 minutes."
|
||||
|
||||
parsed = _parse_ingest_output(result.stdout)
|
||||
|
||||
if result.returncode != 0 or not parsed["job_file"]:
|
||||
return (
|
||||
f"Error: ingest-cora failed (exit code {result.returncode}).\n"
|
||||
f"stdout: {result.stdout[-500:]}\n"
|
||||
f"stderr: {result.stderr[-500:]}"
|
||||
)
|
||||
|
||||
return (
|
||||
f"CORA ingest complete.\n\n"
|
||||
f"- Project: {parsed['project_name']} (ID: {parsed['project_id']})\n"
|
||||
f"- Keyword: {parsed['main_keyword']}\n"
|
||||
f"- Job file: {parsed['job_file']}\n\n"
|
||||
f"Run `blm_generate_batch` with this job file to generate content."
|
||||
)
|
||||
|
||||
|
||||
@tool(
|
||||
"blm_generate_batch",
|
||||
description=(
|
||||
"Standalone content generation: runs Big-Link-Man's generate-batch "
|
||||
"command on an existing job file. Use after ingest-cora or for "
|
||||
"re-running generation on a manually created job."
|
||||
),
|
||||
category="linkbuilding",
|
||||
)
|
||||
def blm_generate_batch(
|
||||
job_file: str,
|
||||
continue_on_error: bool = True,
|
||||
debug: bool = False,
|
||||
ctx: dict | None = None,
|
||||
) -> str:
|
||||
"""Run generate-batch on an existing job file."""
|
||||
if not job_file:
|
||||
return "Error: job_file is required."
|
||||
|
||||
blm_dir = _get_blm_dir(ctx)
|
||||
job_path = Path(blm_dir) / job_file if not Path(job_file).is_absolute() else Path(job_file)
|
||||
|
||||
if not job_path.exists():
|
||||
return f"Error: Job file not found: {job_path}"
|
||||
|
||||
args = ["generate-batch", "-j", str(job_path)]
|
||||
if continue_on_error:
|
||||
args.append("--continue-on-error")
|
||||
if debug:
|
||||
args.append("--debug")
|
||||
|
||||
try:
|
||||
result = _run_blm_command(args, blm_dir)
|
||||
except subprocess.TimeoutExpired:
|
||||
return "Error: generate-batch timed out after 30 minutes."
|
||||
|
||||
parsed = _parse_generate_output(result.stdout)
|
||||
|
||||
if result.returncode != 0:
|
||||
return (
|
||||
f"Error: generate-batch failed (exit code {result.returncode}).\n"
|
||||
f"stdout: {result.stdout[-500:]}\n"
|
||||
f"stderr: {result.stderr[-500:]}"
|
||||
)
|
||||
|
||||
output = "Content generation complete.\n\n"
|
||||
output += f"- Status: {'Success' if parsed['success'] else 'Completed'}\n"
|
||||
if parsed["job_moved_to"]:
|
||||
output += f"- Job moved to: {parsed['job_moved_to']}\n"
|
||||
return output
|
||||
|
||||
|
||||
@tool(
|
||||
"scan_cora_folder",
|
||||
description=(
|
||||
"Scan the Cora inbox watch folder for .xlsx files and report "
|
||||
"their processing status. Shows which files are new, processed, "
|
||||
"or failed, and whether they match a ClickUp task."
|
||||
),
|
||||
category="linkbuilding",
|
||||
)
|
||||
def scan_cora_folder(ctx: dict | None = None) -> str:
|
||||
"""Scan the watch folder and return status of .xlsx files."""
|
||||
if not ctx or "config" not in ctx:
|
||||
return "Error: scan_cora_folder requires agent context."
|
||||
|
||||
config = ctx["config"]
|
||||
watch_folder = config.link_building.watch_folder
|
||||
if not watch_folder:
|
||||
return "Watch folder not configured (link_building.watch_folder is empty)."
|
||||
|
||||
watch_path = Path(watch_folder)
|
||||
if not watch_path.exists():
|
||||
return f"Watch folder does not exist: {watch_folder}"
|
||||
|
||||
db = ctx.get("db")
|
||||
xlsx_files = sorted(watch_path.glob("*.xlsx"))
|
||||
|
||||
if not xlsx_files:
|
||||
return f"No .xlsx files found in {watch_folder}."
|
||||
|
||||
lines = [f"## Cora Inbox: {watch_folder}\n"]
|
||||
|
||||
for f in xlsx_files:
|
||||
filename = f.name
|
||||
status = "new"
|
||||
if db:
|
||||
kv_val = db.kv_get(f"linkbuilding:watched:{filename}")
|
||||
if kv_val:
|
||||
try:
|
||||
watched = json.loads(kv_val)
|
||||
status = watched.get("status", "unknown")
|
||||
except json.JSONDecodeError:
|
||||
status = "tracked"
|
||||
|
||||
lines.append(f"- **{filename}** — status: {status}")
|
||||
|
||||
# Check processed subfolder
|
||||
processed_dir = watch_path / "processed"
|
||||
if processed_dir.exists():
|
||||
processed = list(processed_dir.glob("*.xlsx"))
|
||||
if processed:
|
||||
lines.append(f"\n### Processed ({len(processed)} files)")
|
||||
for f in processed[:10]:
|
||||
lines.append(f"- {f.name}")
|
||||
if len(processed) > 10:
|
||||
lines.append(f"- ... and {len(processed) - 10} more")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
@tool(
|
||||
"setup_linkbuilding_fields",
|
||||
description=(
|
||||
"One-time setup tool: creates the required ClickUp custom fields "
|
||||
"(LB Method, Keyword, CoraFile, etc.) across all lists in the space. "
|
||||
"Safe to re-run — skips fields that already exist."
|
||||
),
|
||||
category="linkbuilding",
|
||||
)
|
||||
def setup_linkbuilding_fields(ctx: dict | None = None) -> str:
|
||||
"""Create link building custom fields in ClickUp."""
|
||||
if not ctx or "config" not in ctx:
|
||||
return "Error: requires agent context."
|
||||
|
||||
config = ctx["config"]
|
||||
if not config.clickup.enabled:
|
||||
return "Error: ClickUp integration not enabled."
|
||||
|
||||
cu_client = _get_clickup_client(ctx)
|
||||
if not cu_client:
|
||||
return "Error: could not create ClickUp client."
|
||||
|
||||
try:
|
||||
space_id = config.clickup.space_id
|
||||
list_ids = cu_client.get_list_ids_from_space(space_id)
|
||||
if not list_ids:
|
||||
return f"No lists found in space {space_id}."
|
||||
|
||||
fields_to_create = [
|
||||
{
|
||||
"name": "LB Method",
|
||||
"type": "drop_down",
|
||||
"type_config": {
|
||||
"options": [
|
||||
{"name": "Cora Backlinks", "color": "#04A9F4"},
|
||||
]
|
||||
},
|
||||
},
|
||||
{"name": "Keyword", "type": "short_text"},
|
||||
{"name": "CoraFile", "type": "short_text"},
|
||||
{"name": "CustomAnchors", "type": "short_text"},
|
||||
{"name": "BrandedPlusRatio", "type": "short_text"},
|
||||
{"name": "CLIFlags", "type": "short_text"},
|
||||
]
|
||||
|
||||
results = []
|
||||
for list_id in list_ids:
|
||||
existing = cu_client.get_custom_fields(list_id)
|
||||
existing_names = {f.get("name") for f in existing}
|
||||
|
||||
for field_def in fields_to_create:
|
||||
if field_def["name"] in existing_names:
|
||||
continue
|
||||
|
||||
try:
|
||||
cu_client.create_custom_field(
|
||||
list_id,
|
||||
field_def["name"],
|
||||
field_def["type"],
|
||||
field_def.get("type_config"),
|
||||
)
|
||||
results.append(f"Created '{field_def['name']}' in list {list_id}")
|
||||
except Exception as e:
|
||||
results.append(f"Failed to create '{field_def['name']}' in list {list_id}: {e}")
|
||||
|
||||
if not results:
|
||||
return "All fields already exist in all lists."
|
||||
|
||||
return "## Setup Results\n\n" + "\n".join(f"- {r}" for r in results)
|
||||
|
||||
finally:
|
||||
cu_client.close()
|
||||
Loading…
Reference in New Issue