CheddahBot/clickup_runner/blm.py

263 lines
7.4 KiB
Python

"""Big-Link-Man CLI runner.
Runs ingest-cora and generate-batch via BLM's own venv Python.
Ported from cheddahbot/tools/linkbuilding.py for headless use.
"""
from __future__ import annotations
import logging
import os
import re
import subprocess
from dataclasses import dataclass
from pathlib import Path
log = logging.getLogger(__name__)
@dataclass
class IngestResult:
"""Parsed output from ingest-cora."""
success: bool
project_id: str = ""
project_name: str = ""
main_keyword: str = ""
job_file: str = ""
error: str = ""
@dataclass
class GenerateResult:
"""Parsed output from generate-batch."""
success: bool
job_moved_to: str = ""
error: str = ""
def _resolve_venv_python(blm_dir: str) -> Path:
"""Find BLM's venv Python executable."""
venv_python = Path(blm_dir) / ".venv" / "Scripts" / "python.exe"
if not venv_python.exists():
# Fallback for Linux/Mac
venv_python = Path(blm_dir) / ".venv" / "bin" / "python"
if not venv_python.exists():
raise FileNotFoundError(
"No .venv found in %s. BLM must have its own venv." % blm_dir
)
return venv_python
def _run_blm(
args: list[str], blm_dir: str, timeout: int = 1800
) -> subprocess.CompletedProcess:
"""Run a BLM CLI command with credential injection."""
venv_python = _resolve_venv_python(blm_dir)
cmd = [str(venv_python), "main.py"] + args
# Inject credentials from env vars
username = os.getenv("BLM_USERNAME", "")
password = os.getenv("BLM_PASSWORD", "")
if username and "-u" not in args and "--username" not in args:
cmd.extend(["-u", username])
if password and "-p" not in args and "--password" not in args:
cmd.extend(["-p", password])
log.info("BLM command: %s (cwd=%s)", " ".join(cmd), blm_dir)
result = subprocess.run(
cmd,
cwd=blm_dir,
capture_output=True,
text=True,
timeout=timeout,
)
log.info("BLM exit code: %d", result.returncode)
if result.stdout:
log.debug("BLM stdout: %s", result.stdout[:2000])
if result.stderr:
log.debug("BLM stderr: %s", result.stderr[:2000])
return result
def find_cora_xlsx(keyword: str, cora_inbox: str) -> str | None:
"""Find the Cora xlsx in the inbox directory by keyword match.
Looks for files whose name (slugified) matches the keyword.
Returns the full path or None.
"""
inbox = Path(cora_inbox)
if not inbox.exists():
log.warning("Cora inbox not found: %s", cora_inbox)
return None
# Slugify keyword for matching: lowercase, spaces -> underscores
slug = keyword.lower().strip().replace(" ", "_")
slug = re.sub(r"[^a-z0-9_]", "", slug)
# Look for exact match first, then prefix match
for xlsx in sorted(inbox.glob("*.xlsx"), key=lambda p: p.stat().st_mtime, reverse=True):
name_lower = xlsx.stem.lower()
if name_lower == slug:
return str(xlsx)
# Prefix match (keyword slug is prefix of filename)
for xlsx in sorted(inbox.glob("*.xlsx"), key=lambda p: p.stat().st_mtime, reverse=True):
name_lower = xlsx.stem.lower()
if name_lower.startswith(slug):
return str(xlsx)
log.warning("No xlsx matching '%s' in %s", keyword, cora_inbox)
return None
def build_ingest_args(
xlsx_path: str,
project_name: str,
money_site_url: str = "",
branded_plus_ratio: str = "",
custom_anchors: str = "",
cli_flags: str = "",
) -> list[str]:
"""Build the ingest-cora CLI argument list."""
args = ["ingest-cora", "-f", xlsx_path, "-n", project_name]
if money_site_url:
args.extend(["-m", money_site_url])
if branded_plus_ratio:
try:
bp = float(branded_plus_ratio)
if bp != 0.7:
args.extend(["-bp", str(bp)])
except (ValueError, TypeError):
pass
if custom_anchors:
args.extend(["-a", custom_anchors])
if cli_flags:
args.extend(cli_flags.strip().split())
return args
def parse_ingest_output(stdout: str) -> IngestResult:
"""Parse ingest-cora stdout."""
result = IngestResult(success=False)
for line in stdout.splitlines():
line = line.strip()
m = re.match(r"^Success: Project '(.+)' created \(ID: (\d+)\)$", line)
if m:
result.project_name = m.group(1)
result.project_id = m.group(2)
result.success = True
continue
m = re.match(r"^Job file created: (.+)$", line)
if m:
result.job_file = m.group(1).strip()
continue
m = re.match(r"^Main Keyword: (.+)$", line)
if m:
result.main_keyword = m.group(1).strip()
continue
return result
def parse_generate_output(stdout: str) -> GenerateResult:
"""Parse generate-batch stdout."""
result = GenerateResult(success=False)
for line in stdout.splitlines():
line = line.strip()
m = re.match(r"^Job file moved to: (.+)$", line)
if m:
result.job_moved_to = m.group(1).strip()
result.success = True
continue
return result
def run_ingest(
xlsx_path: str,
keyword: str,
money_site_url: str,
blm_dir: str,
timeout: int = 1800,
branded_plus_ratio: str = "",
custom_anchors: str = "",
cli_flags: str = "",
) -> IngestResult:
"""Run ingest-cora and return parsed result."""
args = build_ingest_args(
xlsx_path=xlsx_path,
project_name=keyword,
money_site_url=money_site_url,
branded_plus_ratio=branded_plus_ratio,
custom_anchors=custom_anchors,
cli_flags=cli_flags,
)
try:
proc = _run_blm(args, blm_dir, timeout=timeout)
except subprocess.TimeoutExpired:
return IngestResult(
success=False,
error="ingest-cora timed out after %d seconds" % timeout,
)
except FileNotFoundError as e:
return IngestResult(success=False, error=str(e))
if proc.returncode != 0:
return IngestResult(
success=False,
error="ingest-cora failed (exit code %d).\nstdout: %s\nstderr: %s"
% (proc.returncode, proc.stdout[-500:], proc.stderr[-500:]),
)
parsed = parse_ingest_output(proc.stdout)
if not parsed.job_file:
return IngestResult(
success=False,
error="ingest-cora produced no job file.\nstdout: %s" % proc.stdout[-500:],
)
return parsed
def run_generate(
job_file: str,
blm_dir: str,
timeout: int = 1800,
) -> GenerateResult:
"""Run generate-batch and return parsed result."""
job_path = Path(blm_dir) / job_file if not Path(job_file).is_absolute() else Path(job_file)
args = ["generate-batch", "-j", str(job_path), "--continue-on-error"]
try:
proc = _run_blm(args, blm_dir, timeout=timeout)
except subprocess.TimeoutExpired:
return GenerateResult(
success=False,
error="generate-batch timed out after %d seconds" % timeout,
)
except FileNotFoundError as e:
return GenerateResult(success=False, error=str(e))
if proc.returncode != 0:
return GenerateResult(
success=False,
error="generate-batch failed (exit code %d).\nstdout: %s\nstderr: %s"
% (proc.returncode, proc.stdout[-500:], proc.stderr[-500:]),
)
return parse_generate_output(proc.stdout)