Linkman-Paperclip-Wrap/tests/test_pipeline.py

461 lines
16 KiB
Python

"""Tests for the three pipeline entry points.
BLM subprocess calls are mocked via `link_building_workflow.blm.run_blm_command`.
The pipeline module imports blm as `blm_mod` and calls `blm_mod.run_blm_command(...)`,
so we patch there.
"""
from __future__ import annotations
import subprocess
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from link_building_workflow import (
BLMConfig,
Deps,
blm_generate_batch,
blm_ingest_cora,
run_cora_backlinks,
)
@pytest.fixture()
def xlsx_file(tmp_path: Path) -> Path:
p = tmp_path / "precision-cnc-machining.xlsx"
p.write_bytes(b"fake xlsx")
return p
def _mock_proc(stdout: str = "", stderr: str = "", returncode: int = 0) -> MagicMock:
m = MagicMock()
m.stdout = stdout
m.stderr = stderr
m.returncode = returncode
return m
# ---------------------------------------------------------------------------
# blm_ingest_cora
# ---------------------------------------------------------------------------
class TestBlmIngestCora:
def test_missing_xlsx_path(self, deps: Deps):
result = blm_ingest_cora(xlsx_path="", project_name="P", deps=deps)
assert result.ok is False
assert "xlsx_path is required" in result.error
def test_missing_project_name(self, deps: Deps, xlsx_file: Path):
result = blm_ingest_cora(
xlsx_path=str(xlsx_file), project_name="", deps=deps
)
assert result.ok is False
assert "project_name is required" in result.error
def test_xlsx_not_found(self, deps: Deps):
result = blm_ingest_cora(
xlsx_path="/nope/missing.xlsx", project_name="P", deps=deps
)
assert result.ok is False
assert "not found" in result.error
def test_success(
self, deps: Deps, xlsx_file: Path, ingest_success_stdout: str
):
proc = _mock_proc(stdout=ingest_success_stdout)
with patch(
"link_building_workflow.blm.run_blm_command", return_value=proc
):
result = blm_ingest_cora(
xlsx_path=str(xlsx_file),
project_name="Test Project",
deps=deps,
)
assert result.ok is True
assert result.step == "ingest"
assert result.ingest is not None
assert result.ingest.project_id == "42"
assert result.ingest.job_file == "jobs/test-project.json"
assert result.job_file == "jobs/test-project.json"
assert result.project_name == "Test Project"
assert "CORA ingest complete" in result.summary
def test_nonzero_exit_reports_failure(
self, deps: Deps, xlsx_file: Path
):
proc = _mock_proc(stdout="", stderr="boom", returncode=1)
with patch(
"link_building_workflow.blm.run_blm_command", return_value=proc
):
result = blm_ingest_cora(
xlsx_path=str(xlsx_file),
project_name="P",
deps=deps,
)
assert result.ok is False
assert "exit=1" in result.error
assert "boom" in result.error
def test_timeout(self, deps: Deps, xlsx_file: Path):
with patch(
"link_building_workflow.blm.run_blm_command",
side_effect=subprocess.TimeoutExpired(cmd="python", timeout=300),
):
result = blm_ingest_cora(
xlsx_path=str(xlsx_file),
project_name="P",
deps=deps,
)
assert result.ok is False
assert "timed out" in result.error
def test_uses_config_default_branded_plus_ratio(
self, deps: Deps, xlsx_file: Path, ingest_success_stdout: str
):
# Caller passes None, so Deps default (0.7) should be used
proc = _mock_proc(stdout=ingest_success_stdout)
with patch(
"link_building_workflow.blm.run_blm_command", return_value=proc
) as mock_run:
blm_ingest_cora(
xlsx_path=str(xlsx_file),
project_name="P",
deps=deps,
branded_plus_ratio=None,
)
args = mock_run.call_args[0][0]
# 0.7 is the default, so -bp should NOT appear in args
assert "-bp" not in args
def test_caller_override_branded_plus_ratio(
self, deps: Deps, xlsx_file: Path, ingest_success_stdout: str
):
proc = _mock_proc(stdout=ingest_success_stdout)
with patch(
"link_building_workflow.blm.run_blm_command", return_value=proc
) as mock_run:
blm_ingest_cora(
xlsx_path=str(xlsx_file),
project_name="P",
deps=deps,
branded_plus_ratio=0.85,
)
args = mock_run.call_args[0][0]
assert "-bp" in args
assert args[args.index("-bp") + 1] == "0.85"
# ---------------------------------------------------------------------------
# blm_generate_batch
# ---------------------------------------------------------------------------
class TestBlmGenerateBatch:
def test_missing_job_file_arg(self, deps: Deps):
result = blm_generate_batch(job_file="", deps=deps)
assert result.ok is False
assert "job_file is required" in result.error
def test_job_file_does_not_exist(self, deps: Deps):
result = blm_generate_batch(
job_file="/definitely/not/here.json", deps=deps
)
assert result.ok is False
assert "not found" in result.error
def test_relative_path_resolved_against_blm_dir(
self, deps: Deps, generate_success_stdout: str
):
# Create a relative job file under the fake BLM dir
job_rel = "jobs/x.json"
(Path(deps.blm.blm_dir) / "jobs").mkdir()
(Path(deps.blm.blm_dir) / job_rel).write_text("{}")
proc = _mock_proc(stdout=generate_success_stdout)
with patch(
"link_building_workflow.blm.run_blm_command", return_value=proc
) as mock_run:
result = blm_generate_batch(job_file=job_rel, deps=deps)
assert result.ok is True
# The resolved absolute path should have been passed to BLM
args = mock_run.call_args[0][0]
j_index = args.index("-j")
passed_path = args[j_index + 1]
assert passed_path.endswith("x.json")
assert Path(passed_path).is_absolute()
def test_continue_on_error_flag_default(
self, deps: Deps, tmp_path: Path, generate_success_stdout: str
):
job = tmp_path / "job.json"
job.write_text("{}")
proc = _mock_proc(stdout=generate_success_stdout)
with patch(
"link_building_workflow.blm.run_blm_command", return_value=proc
) as mock_run:
blm_generate_batch(job_file=str(job), deps=deps)
args = mock_run.call_args[0][0]
assert "--continue-on-error" in args
def test_continue_on_error_disabled(
self, deps: Deps, tmp_path: Path, generate_success_stdout: str
):
job = tmp_path / "job.json"
job.write_text("{}")
proc = _mock_proc(stdout=generate_success_stdout)
with patch(
"link_building_workflow.blm.run_blm_command", return_value=proc
) as mock_run:
blm_generate_batch(
job_file=str(job), deps=deps, continue_on_error=False
)
args = mock_run.call_args[0][0]
assert "--continue-on-error" not in args
def test_debug_flag(self, deps: Deps, tmp_path: Path, generate_success_stdout: str):
job = tmp_path / "job.json"
job.write_text("{}")
proc = _mock_proc(stdout=generate_success_stdout)
with patch(
"link_building_workflow.blm.run_blm_command", return_value=proc
) as mock_run:
blm_generate_batch(job_file=str(job), deps=deps, debug=True)
assert "--debug" in mock_run.call_args[0][0]
def test_nonzero_exit(self, deps: Deps, tmp_path: Path):
job = tmp_path / "job.json"
job.write_text("{}")
proc = _mock_proc(stdout="", stderr="fail", returncode=2)
with patch(
"link_building_workflow.blm.run_blm_command", return_value=proc
):
result = blm_generate_batch(job_file=str(job), deps=deps)
assert result.ok is False
assert "exit=2" in result.error
def test_timeout(self, deps: Deps, tmp_path: Path):
job = tmp_path / "job.json"
job.write_text("{}")
with patch(
"link_building_workflow.blm.run_blm_command",
side_effect=subprocess.TimeoutExpired(cmd="python", timeout=300),
):
result = blm_generate_batch(job_file=str(job), deps=deps)
assert result.ok is False
assert "timed out" in result.error
# ---------------------------------------------------------------------------
# run_cora_backlinks (full pipeline)
# ---------------------------------------------------------------------------
class TestRunCoraBacklinks:
def test_missing_money_site_url(self, deps: Deps, xlsx_file: Path):
result = run_cora_backlinks(
xlsx_path=str(xlsx_file),
project_name="P",
money_site_url="",
deps=deps,
)
assert result.ok is False
assert "IMSURL" in result.error
def test_full_success(
self,
deps: Deps,
xlsx_file: Path,
ingest_success_stdout: str,
generate_success_stdout: str,
):
# ingest stdout must reference a job file that then exists on disk
# for blm_generate_batch's existence check to pass.
job_rel = "jobs/test-project.json"
(Path(deps.blm.blm_dir) / "jobs").mkdir()
(Path(deps.blm.blm_dir) / job_rel).write_text("{}")
procs = [
_mock_proc(stdout=ingest_success_stdout),
_mock_proc(stdout=generate_success_stdout),
]
with patch(
"link_building_workflow.blm.run_blm_command", side_effect=procs
) as mock_run:
result = run_cora_backlinks(
xlsx_path=str(xlsx_file),
project_name="Test Project",
money_site_url="https://example.com",
deps=deps,
)
assert result.ok is True
assert result.step == "complete"
assert result.ingest is not None
assert result.generate is not None
assert result.ingest.project_id == "42"
assert result.generate.job_moved_to == "jobs/done/test-project.json"
assert result.job_file == "jobs/done/test-project.json"
assert "Step 1" in result.summary and "Step 2" in result.summary
# BLM was invoked twice (ingest, generate)
assert mock_run.call_count == 2
ingest_args = mock_run.call_args_list[0][0][0]
generate_args = mock_run.call_args_list[1][0][0]
assert "ingest-cora" in ingest_args
assert "generate-batch" in generate_args
def test_ingest_failure_skips_generate(
self, deps: Deps, xlsx_file: Path
):
procs = [_mock_proc(stdout="", stderr="fail", returncode=1)]
with patch(
"link_building_workflow.blm.run_blm_command", side_effect=procs
) as mock_run:
result = run_cora_backlinks(
xlsx_path=str(xlsx_file),
project_name="P",
money_site_url="https://example.com",
deps=deps,
)
assert result.ok is False
assert result.step == "ingest"
assert mock_run.call_count == 1 # generate not called
def test_generate_failure_preserves_ingest(
self, deps: Deps, xlsx_file: Path, ingest_success_stdout: str
):
job_rel = "jobs/test-project.json"
(Path(deps.blm.blm_dir) / "jobs").mkdir()
(Path(deps.blm.blm_dir) / job_rel).write_text("{}")
procs = [
_mock_proc(stdout=ingest_success_stdout),
_mock_proc(stdout="", stderr="gen fail", returncode=3),
]
with patch(
"link_building_workflow.blm.run_blm_command", side_effect=procs
):
result = run_cora_backlinks(
xlsx_path=str(xlsx_file),
project_name="Test Project",
money_site_url="https://example.com",
deps=deps,
)
assert result.ok is False
assert result.step == "generate"
# Ingest succeeded; its data is still on the result
assert result.ingest is not None
assert result.ingest.project_id == "42"
assert "gen fail" in result.error
def test_on_progress_callback_invoked(
self,
deps: Deps,
xlsx_file: Path,
ingest_success_stdout: str,
generate_success_stdout: str,
):
job_rel = "jobs/test-project.json"
(Path(deps.blm.blm_dir) / "jobs").mkdir()
(Path(deps.blm.blm_dir) / job_rel).write_text("{}")
progress_calls: list[str] = []
procs = [
_mock_proc(stdout=ingest_success_stdout),
_mock_proc(stdout=generate_success_stdout),
]
with patch(
"link_building_workflow.blm.run_blm_command", side_effect=procs
):
result = run_cora_backlinks(
xlsx_path=str(xlsx_file),
project_name="Test Project",
money_site_url="https://example.com",
deps=deps,
on_progress=progress_calls.append,
)
assert result.ok is True
assert len(progress_calls) >= 2
assert any("Step 1" in m for m in progress_calls)
assert any("Step 2" in m for m in progress_calls)
# log_lines mirrors progress_calls
assert result.log_lines == progress_calls
def test_on_progress_exception_does_not_break_pipeline(
self,
deps: Deps,
xlsx_file: Path,
ingest_success_stdout: str,
generate_success_stdout: str,
):
job_rel = "jobs/test-project.json"
(Path(deps.blm.blm_dir) / "jobs").mkdir()
(Path(deps.blm.blm_dir) / job_rel).write_text("{}")
def broken(_msg: str) -> None:
raise RuntimeError("progress callback failed")
procs = [
_mock_proc(stdout=ingest_success_stdout),
_mock_proc(stdout=generate_success_stdout),
]
with patch(
"link_building_workflow.blm.run_blm_command", side_effect=procs
):
result = run_cora_backlinks(
xlsx_path=str(xlsx_file),
project_name="Test Project",
money_site_url="https://example.com",
deps=deps,
on_progress=broken,
)
# Pipeline still completed successfully despite broken callback
assert result.ok is True
def test_uses_config_default_ratio_when_none(
self,
deps: Deps,
xlsx_file: Path,
ingest_success_stdout: str,
generate_success_stdout: str,
):
# Verify the Deps-level default flows into build_ingest_args
blm_cfg = BLMConfig(
blm_dir=deps.blm.blm_dir,
username=deps.blm.username,
password=deps.blm.password,
timeout_seconds=deps.blm.timeout_seconds,
default_branded_plus_ratio=0.9, # non-default
python_exe=deps.blm.python_exe,
)
new_deps = Deps(blm=blm_cfg, llm_check=deps.llm_check)
job_rel = "jobs/test-project.json"
(Path(blm_cfg.blm_dir) / "jobs").mkdir()
(Path(blm_cfg.blm_dir) / job_rel).write_text("{}")
procs = [
_mock_proc(stdout=ingest_success_stdout),
_mock_proc(stdout=generate_success_stdout),
]
with patch(
"link_building_workflow.blm.run_blm_command", side_effect=procs
) as mock_run:
run_cora_backlinks(
xlsx_path=str(xlsx_file),
project_name="Test Project",
money_site_url="https://example.com",
deps=new_deps,
branded_plus_ratio=None, # should pick up 0.9 default
)
ingest_args = mock_run.call_args_list[0][0][0]
assert "-bp" in ingest_args
assert ingest_args[ingest_args.index("-bp") + 1] == "0.9"