"""Big-Link-Man CLI runner. Runs ingest-cora and generate-batch via BLM's own venv Python. Ported from cheddahbot/tools/linkbuilding.py for headless use. """ from __future__ import annotations import logging import os import re import subprocess from dataclasses import dataclass from pathlib import Path log = logging.getLogger(__name__) @dataclass class IngestResult: """Parsed output from ingest-cora.""" success: bool project_id: str = "" project_name: str = "" main_keyword: str = "" job_file: str = "" error: str = "" @dataclass class GenerateResult: """Parsed output from generate-batch.""" success: bool job_moved_to: str = "" error: str = "" def _resolve_venv_python(blm_dir: str) -> Path: """Find BLM's venv Python executable.""" venv_python = Path(blm_dir) / ".venv" / "Scripts" / "python.exe" if not venv_python.exists(): # Fallback for Linux/Mac venv_python = Path(blm_dir) / ".venv" / "bin" / "python" if not venv_python.exists(): raise FileNotFoundError( "No .venv found in %s. BLM must have its own venv." % blm_dir ) return venv_python def _run_blm( args: list[str], blm_dir: str, timeout: int = 1800 ) -> subprocess.CompletedProcess: """Run a BLM CLI command with credential injection.""" venv_python = _resolve_venv_python(blm_dir) cmd = [str(venv_python), "main.py"] + args # Inject credentials from env vars username = os.getenv("BLM_USERNAME", "") password = os.getenv("BLM_PASSWORD", "") if username and "-u" not in args and "--username" not in args: cmd.extend(["-u", username]) if password and "-p" not in args and "--password" not in args: cmd.extend(["-p", password]) log.info("BLM command: %s (cwd=%s)", " ".join(cmd), blm_dir) result = subprocess.run( cmd, cwd=blm_dir, capture_output=True, text=True, timeout=timeout, ) log.info("BLM exit code: %d", result.returncode) if result.stdout: log.debug("BLM stdout: %s", result.stdout[:2000]) if result.stderr: log.debug("BLM stderr: %s", result.stderr[:2000]) return result def find_cora_xlsx(keyword: str, cora_inbox: str) -> str | None: """Find the Cora xlsx in the inbox directory by keyword match. Looks for files whose name (slugified) matches the keyword. Returns the full path or None. """ inbox = Path(cora_inbox) if not inbox.exists(): log.warning("Cora inbox not found: %s", cora_inbox) return None # Slugify keyword for matching: lowercase, spaces -> underscores slug = keyword.lower().strip().replace(" ", "_") slug = re.sub(r"[^a-z0-9_]", "", slug) # Look for exact match first, then prefix match for xlsx in sorted(inbox.glob("*.xlsx"), key=lambda p: p.stat().st_mtime, reverse=True): name_lower = xlsx.stem.lower() if name_lower == slug: return str(xlsx) # Prefix match (keyword slug is prefix of filename) for xlsx in sorted(inbox.glob("*.xlsx"), key=lambda p: p.stat().st_mtime, reverse=True): name_lower = xlsx.stem.lower() if name_lower.startswith(slug): return str(xlsx) log.warning("No xlsx matching '%s' in %s", keyword, cora_inbox) return None def build_ingest_args( xlsx_path: str, project_name: str, money_site_url: str = "", branded_plus_ratio: str = "", custom_anchors: str = "", cli_flags: str = "", ) -> list[str]: """Build the ingest-cora CLI argument list.""" args = ["ingest-cora", "-f", xlsx_path, "-n", project_name] if money_site_url: args.extend(["-m", money_site_url]) if branded_plus_ratio: try: bp = float(branded_plus_ratio) if bp != 0.7: args.extend(["-bp", str(bp)]) except (ValueError, TypeError): pass if custom_anchors: args.extend(["-a", custom_anchors]) if cli_flags: args.extend(cli_flags.strip().split()) return args def parse_ingest_output(stdout: str) -> IngestResult: """Parse ingest-cora stdout.""" result = IngestResult(success=False) for line in stdout.splitlines(): line = line.strip() m = re.match(r"^Success: Project '(.+)' created \(ID: (\d+)\)$", line) if m: result.project_name = m.group(1) result.project_id = m.group(2) result.success = True continue m = re.match(r"^Job file created: (.+)$", line) if m: result.job_file = m.group(1).strip() continue m = re.match(r"^Main Keyword: (.+)$", line) if m: result.main_keyword = m.group(1).strip() continue return result def parse_generate_output(stdout: str) -> GenerateResult: """Parse generate-batch stdout.""" result = GenerateResult(success=False) for line in stdout.splitlines(): line = line.strip() m = re.match(r"^Job file moved to: (.+)$", line) if m: result.job_moved_to = m.group(1).strip() result.success = True continue return result def run_ingest( xlsx_path: str, keyword: str, money_site_url: str, blm_dir: str, timeout: int = 1800, branded_plus_ratio: str = "", custom_anchors: str = "", cli_flags: str = "", ) -> IngestResult: """Run ingest-cora and return parsed result.""" args = build_ingest_args( xlsx_path=xlsx_path, project_name=keyword, money_site_url=money_site_url, branded_plus_ratio=branded_plus_ratio, custom_anchors=custom_anchors, cli_flags=cli_flags, ) try: proc = _run_blm(args, blm_dir, timeout=timeout) except subprocess.TimeoutExpired: return IngestResult( success=False, error="ingest-cora timed out after %d seconds" % timeout, ) except FileNotFoundError as e: return IngestResult(success=False, error=str(e)) if proc.returncode != 0: return IngestResult( success=False, error="ingest-cora failed (exit code %d).\nstdout: %s\nstderr: %s" % (proc.returncode, proc.stdout[-500:], proc.stderr[-500:]), ) parsed = parse_ingest_output(proc.stdout) if not parsed.job_file: return IngestResult( success=False, error="ingest-cora produced no job file.\nstdout: %s" % proc.stdout[-500:], ) return parsed def run_generate( job_file: str, blm_dir: str, timeout: int = 1800, ) -> GenerateResult: """Run generate-batch and return parsed result.""" job_path = Path(blm_dir) / job_file if not Path(job_file).is_absolute() else Path(job_file) args = ["generate-batch", "-j", str(job_path), "--continue-on-error"] try: proc = _run_blm(args, blm_dir, timeout=timeout) except subprocess.TimeoutExpired: return GenerateResult( success=False, error="generate-batch timed out after %d seconds" % timeout, ) except FileNotFoundError as e: return GenerateResult(success=False, error=str(e)) if proc.returncode != 0: return GenerateResult( success=False, error="generate-batch failed (exit code %d).\nstdout: %s\nstderr: %s" % (proc.returncode, proc.stdout[-500:], proc.stderr[-500:]), ) return parse_generate_output(proc.stdout)