461 lines
16 KiB
Python
461 lines
16 KiB
Python
"""Tests for the three pipeline entry points.
|
|
|
|
BLM subprocess calls are mocked via `link_building_workflow.blm.run_blm_command`.
|
|
The pipeline module imports blm as `blm_mod` and calls `blm_mod.run_blm_command(...)`,
|
|
so we patch there.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import subprocess
|
|
from pathlib import Path
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
from link_building_workflow import (
|
|
BLMConfig,
|
|
Deps,
|
|
blm_generate_batch,
|
|
blm_ingest_cora,
|
|
run_cora_backlinks,
|
|
)
|
|
|
|
|
|
@pytest.fixture()
|
|
def xlsx_file(tmp_path: Path) -> Path:
|
|
p = tmp_path / "precision-cnc-machining.xlsx"
|
|
p.write_bytes(b"fake xlsx")
|
|
return p
|
|
|
|
|
|
def _mock_proc(stdout: str = "", stderr: str = "", returncode: int = 0) -> MagicMock:
|
|
m = MagicMock()
|
|
m.stdout = stdout
|
|
m.stderr = stderr
|
|
m.returncode = returncode
|
|
return m
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# blm_ingest_cora
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestBlmIngestCora:
|
|
def test_missing_xlsx_path(self, deps: Deps):
|
|
result = blm_ingest_cora(xlsx_path="", project_name="P", deps=deps)
|
|
assert result.ok is False
|
|
assert "xlsx_path is required" in result.error
|
|
|
|
def test_missing_project_name(self, deps: Deps, xlsx_file: Path):
|
|
result = blm_ingest_cora(
|
|
xlsx_path=str(xlsx_file), project_name="", deps=deps
|
|
)
|
|
assert result.ok is False
|
|
assert "project_name is required" in result.error
|
|
|
|
def test_xlsx_not_found(self, deps: Deps):
|
|
result = blm_ingest_cora(
|
|
xlsx_path="/nope/missing.xlsx", project_name="P", deps=deps
|
|
)
|
|
assert result.ok is False
|
|
assert "not found" in result.error
|
|
|
|
def test_success(
|
|
self, deps: Deps, xlsx_file: Path, ingest_success_stdout: str
|
|
):
|
|
proc = _mock_proc(stdout=ingest_success_stdout)
|
|
with patch(
|
|
"link_building_workflow.blm.run_blm_command", return_value=proc
|
|
):
|
|
result = blm_ingest_cora(
|
|
xlsx_path=str(xlsx_file),
|
|
project_name="Test Project",
|
|
deps=deps,
|
|
)
|
|
assert result.ok is True
|
|
assert result.step == "ingest"
|
|
assert result.ingest is not None
|
|
assert result.ingest.project_id == "42"
|
|
assert result.ingest.job_file == "jobs/test-project.json"
|
|
assert result.job_file == "jobs/test-project.json"
|
|
assert result.project_name == "Test Project"
|
|
assert "CORA ingest complete" in result.summary
|
|
|
|
def test_nonzero_exit_reports_failure(
|
|
self, deps: Deps, xlsx_file: Path
|
|
):
|
|
proc = _mock_proc(stdout="", stderr="boom", returncode=1)
|
|
with patch(
|
|
"link_building_workflow.blm.run_blm_command", return_value=proc
|
|
):
|
|
result = blm_ingest_cora(
|
|
xlsx_path=str(xlsx_file),
|
|
project_name="P",
|
|
deps=deps,
|
|
)
|
|
assert result.ok is False
|
|
assert "exit=1" in result.error
|
|
assert "boom" in result.error
|
|
|
|
def test_timeout(self, deps: Deps, xlsx_file: Path):
|
|
with patch(
|
|
"link_building_workflow.blm.run_blm_command",
|
|
side_effect=subprocess.TimeoutExpired(cmd="python", timeout=300),
|
|
):
|
|
result = blm_ingest_cora(
|
|
xlsx_path=str(xlsx_file),
|
|
project_name="P",
|
|
deps=deps,
|
|
)
|
|
assert result.ok is False
|
|
assert "timed out" in result.error
|
|
|
|
def test_uses_config_default_branded_plus_ratio(
|
|
self, deps: Deps, xlsx_file: Path, ingest_success_stdout: str
|
|
):
|
|
# Caller passes None, so Deps default (0.7) should be used
|
|
proc = _mock_proc(stdout=ingest_success_stdout)
|
|
with patch(
|
|
"link_building_workflow.blm.run_blm_command", return_value=proc
|
|
) as mock_run:
|
|
blm_ingest_cora(
|
|
xlsx_path=str(xlsx_file),
|
|
project_name="P",
|
|
deps=deps,
|
|
branded_plus_ratio=None,
|
|
)
|
|
args = mock_run.call_args[0][0]
|
|
# 0.7 is the default, so -bp should NOT appear in args
|
|
assert "-bp" not in args
|
|
|
|
def test_caller_override_branded_plus_ratio(
|
|
self, deps: Deps, xlsx_file: Path, ingest_success_stdout: str
|
|
):
|
|
proc = _mock_proc(stdout=ingest_success_stdout)
|
|
with patch(
|
|
"link_building_workflow.blm.run_blm_command", return_value=proc
|
|
) as mock_run:
|
|
blm_ingest_cora(
|
|
xlsx_path=str(xlsx_file),
|
|
project_name="P",
|
|
deps=deps,
|
|
branded_plus_ratio=0.85,
|
|
)
|
|
args = mock_run.call_args[0][0]
|
|
assert "-bp" in args
|
|
assert args[args.index("-bp") + 1] == "0.85"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# blm_generate_batch
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestBlmGenerateBatch:
|
|
def test_missing_job_file_arg(self, deps: Deps):
|
|
result = blm_generate_batch(job_file="", deps=deps)
|
|
assert result.ok is False
|
|
assert "job_file is required" in result.error
|
|
|
|
def test_job_file_does_not_exist(self, deps: Deps):
|
|
result = blm_generate_batch(
|
|
job_file="/definitely/not/here.json", deps=deps
|
|
)
|
|
assert result.ok is False
|
|
assert "not found" in result.error
|
|
|
|
def test_relative_path_resolved_against_blm_dir(
|
|
self, deps: Deps, generate_success_stdout: str
|
|
):
|
|
# Create a relative job file under the fake BLM dir
|
|
job_rel = "jobs/x.json"
|
|
(Path(deps.blm.blm_dir) / "jobs").mkdir()
|
|
(Path(deps.blm.blm_dir) / job_rel).write_text("{}")
|
|
|
|
proc = _mock_proc(stdout=generate_success_stdout)
|
|
with patch(
|
|
"link_building_workflow.blm.run_blm_command", return_value=proc
|
|
) as mock_run:
|
|
result = blm_generate_batch(job_file=job_rel, deps=deps)
|
|
assert result.ok is True
|
|
|
|
# The resolved absolute path should have been passed to BLM
|
|
args = mock_run.call_args[0][0]
|
|
j_index = args.index("-j")
|
|
passed_path = args[j_index + 1]
|
|
assert passed_path.endswith("x.json")
|
|
assert Path(passed_path).is_absolute()
|
|
|
|
def test_continue_on_error_flag_default(
|
|
self, deps: Deps, tmp_path: Path, generate_success_stdout: str
|
|
):
|
|
job = tmp_path / "job.json"
|
|
job.write_text("{}")
|
|
|
|
proc = _mock_proc(stdout=generate_success_stdout)
|
|
with patch(
|
|
"link_building_workflow.blm.run_blm_command", return_value=proc
|
|
) as mock_run:
|
|
blm_generate_batch(job_file=str(job), deps=deps)
|
|
args = mock_run.call_args[0][0]
|
|
assert "--continue-on-error" in args
|
|
|
|
def test_continue_on_error_disabled(
|
|
self, deps: Deps, tmp_path: Path, generate_success_stdout: str
|
|
):
|
|
job = tmp_path / "job.json"
|
|
job.write_text("{}")
|
|
|
|
proc = _mock_proc(stdout=generate_success_stdout)
|
|
with patch(
|
|
"link_building_workflow.blm.run_blm_command", return_value=proc
|
|
) as mock_run:
|
|
blm_generate_batch(
|
|
job_file=str(job), deps=deps, continue_on_error=False
|
|
)
|
|
args = mock_run.call_args[0][0]
|
|
assert "--continue-on-error" not in args
|
|
|
|
def test_debug_flag(self, deps: Deps, tmp_path: Path, generate_success_stdout: str):
|
|
job = tmp_path / "job.json"
|
|
job.write_text("{}")
|
|
proc = _mock_proc(stdout=generate_success_stdout)
|
|
with patch(
|
|
"link_building_workflow.blm.run_blm_command", return_value=proc
|
|
) as mock_run:
|
|
blm_generate_batch(job_file=str(job), deps=deps, debug=True)
|
|
assert "--debug" in mock_run.call_args[0][0]
|
|
|
|
def test_nonzero_exit(self, deps: Deps, tmp_path: Path):
|
|
job = tmp_path / "job.json"
|
|
job.write_text("{}")
|
|
|
|
proc = _mock_proc(stdout="", stderr="fail", returncode=2)
|
|
with patch(
|
|
"link_building_workflow.blm.run_blm_command", return_value=proc
|
|
):
|
|
result = blm_generate_batch(job_file=str(job), deps=deps)
|
|
assert result.ok is False
|
|
assert "exit=2" in result.error
|
|
|
|
def test_timeout(self, deps: Deps, tmp_path: Path):
|
|
job = tmp_path / "job.json"
|
|
job.write_text("{}")
|
|
with patch(
|
|
"link_building_workflow.blm.run_blm_command",
|
|
side_effect=subprocess.TimeoutExpired(cmd="python", timeout=300),
|
|
):
|
|
result = blm_generate_batch(job_file=str(job), deps=deps)
|
|
assert result.ok is False
|
|
assert "timed out" in result.error
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# run_cora_backlinks (full pipeline)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestRunCoraBacklinks:
|
|
def test_missing_money_site_url(self, deps: Deps, xlsx_file: Path):
|
|
result = run_cora_backlinks(
|
|
xlsx_path=str(xlsx_file),
|
|
project_name="P",
|
|
money_site_url="",
|
|
deps=deps,
|
|
)
|
|
assert result.ok is False
|
|
assert "IMSURL" in result.error
|
|
|
|
def test_full_success(
|
|
self,
|
|
deps: Deps,
|
|
xlsx_file: Path,
|
|
ingest_success_stdout: str,
|
|
generate_success_stdout: str,
|
|
):
|
|
# ingest stdout must reference a job file that then exists on disk
|
|
# for blm_generate_batch's existence check to pass.
|
|
job_rel = "jobs/test-project.json"
|
|
(Path(deps.blm.blm_dir) / "jobs").mkdir()
|
|
(Path(deps.blm.blm_dir) / job_rel).write_text("{}")
|
|
|
|
procs = [
|
|
_mock_proc(stdout=ingest_success_stdout),
|
|
_mock_proc(stdout=generate_success_stdout),
|
|
]
|
|
with patch(
|
|
"link_building_workflow.blm.run_blm_command", side_effect=procs
|
|
) as mock_run:
|
|
result = run_cora_backlinks(
|
|
xlsx_path=str(xlsx_file),
|
|
project_name="Test Project",
|
|
money_site_url="https://example.com",
|
|
deps=deps,
|
|
)
|
|
|
|
assert result.ok is True
|
|
assert result.step == "complete"
|
|
assert result.ingest is not None
|
|
assert result.generate is not None
|
|
assert result.ingest.project_id == "42"
|
|
assert result.generate.job_moved_to == "jobs/done/test-project.json"
|
|
assert result.job_file == "jobs/done/test-project.json"
|
|
assert "Step 1" in result.summary and "Step 2" in result.summary
|
|
|
|
# BLM was invoked twice (ingest, generate)
|
|
assert mock_run.call_count == 2
|
|
ingest_args = mock_run.call_args_list[0][0][0]
|
|
generate_args = mock_run.call_args_list[1][0][0]
|
|
assert "ingest-cora" in ingest_args
|
|
assert "generate-batch" in generate_args
|
|
|
|
def test_ingest_failure_skips_generate(
|
|
self, deps: Deps, xlsx_file: Path
|
|
):
|
|
procs = [_mock_proc(stdout="", stderr="fail", returncode=1)]
|
|
with patch(
|
|
"link_building_workflow.blm.run_blm_command", side_effect=procs
|
|
) as mock_run:
|
|
result = run_cora_backlinks(
|
|
xlsx_path=str(xlsx_file),
|
|
project_name="P",
|
|
money_site_url="https://example.com",
|
|
deps=deps,
|
|
)
|
|
assert result.ok is False
|
|
assert result.step == "ingest"
|
|
assert mock_run.call_count == 1 # generate not called
|
|
|
|
def test_generate_failure_preserves_ingest(
|
|
self, deps: Deps, xlsx_file: Path, ingest_success_stdout: str
|
|
):
|
|
job_rel = "jobs/test-project.json"
|
|
(Path(deps.blm.blm_dir) / "jobs").mkdir()
|
|
(Path(deps.blm.blm_dir) / job_rel).write_text("{}")
|
|
|
|
procs = [
|
|
_mock_proc(stdout=ingest_success_stdout),
|
|
_mock_proc(stdout="", stderr="gen fail", returncode=3),
|
|
]
|
|
with patch(
|
|
"link_building_workflow.blm.run_blm_command", side_effect=procs
|
|
):
|
|
result = run_cora_backlinks(
|
|
xlsx_path=str(xlsx_file),
|
|
project_name="Test Project",
|
|
money_site_url="https://example.com",
|
|
deps=deps,
|
|
)
|
|
assert result.ok is False
|
|
assert result.step == "generate"
|
|
# Ingest succeeded; its data is still on the result
|
|
assert result.ingest is not None
|
|
assert result.ingest.project_id == "42"
|
|
assert "gen fail" in result.error
|
|
|
|
def test_on_progress_callback_invoked(
|
|
self,
|
|
deps: Deps,
|
|
xlsx_file: Path,
|
|
ingest_success_stdout: str,
|
|
generate_success_stdout: str,
|
|
):
|
|
job_rel = "jobs/test-project.json"
|
|
(Path(deps.blm.blm_dir) / "jobs").mkdir()
|
|
(Path(deps.blm.blm_dir) / job_rel).write_text("{}")
|
|
|
|
progress_calls: list[str] = []
|
|
procs = [
|
|
_mock_proc(stdout=ingest_success_stdout),
|
|
_mock_proc(stdout=generate_success_stdout),
|
|
]
|
|
with patch(
|
|
"link_building_workflow.blm.run_blm_command", side_effect=procs
|
|
):
|
|
result = run_cora_backlinks(
|
|
xlsx_path=str(xlsx_file),
|
|
project_name="Test Project",
|
|
money_site_url="https://example.com",
|
|
deps=deps,
|
|
on_progress=progress_calls.append,
|
|
)
|
|
assert result.ok is True
|
|
assert len(progress_calls) >= 2
|
|
assert any("Step 1" in m for m in progress_calls)
|
|
assert any("Step 2" in m for m in progress_calls)
|
|
# log_lines mirrors progress_calls
|
|
assert result.log_lines == progress_calls
|
|
|
|
def test_on_progress_exception_does_not_break_pipeline(
|
|
self,
|
|
deps: Deps,
|
|
xlsx_file: Path,
|
|
ingest_success_stdout: str,
|
|
generate_success_stdout: str,
|
|
):
|
|
job_rel = "jobs/test-project.json"
|
|
(Path(deps.blm.blm_dir) / "jobs").mkdir()
|
|
(Path(deps.blm.blm_dir) / job_rel).write_text("{}")
|
|
|
|
def broken(_msg: str) -> None:
|
|
raise RuntimeError("progress callback failed")
|
|
|
|
procs = [
|
|
_mock_proc(stdout=ingest_success_stdout),
|
|
_mock_proc(stdout=generate_success_stdout),
|
|
]
|
|
with patch(
|
|
"link_building_workflow.blm.run_blm_command", side_effect=procs
|
|
):
|
|
result = run_cora_backlinks(
|
|
xlsx_path=str(xlsx_file),
|
|
project_name="Test Project",
|
|
money_site_url="https://example.com",
|
|
deps=deps,
|
|
on_progress=broken,
|
|
)
|
|
# Pipeline still completed successfully despite broken callback
|
|
assert result.ok is True
|
|
|
|
def test_uses_config_default_ratio_when_none(
|
|
self,
|
|
deps: Deps,
|
|
xlsx_file: Path,
|
|
ingest_success_stdout: str,
|
|
generate_success_stdout: str,
|
|
):
|
|
# Verify the Deps-level default flows into build_ingest_args
|
|
blm_cfg = BLMConfig(
|
|
blm_dir=deps.blm.blm_dir,
|
|
username=deps.blm.username,
|
|
password=deps.blm.password,
|
|
timeout_seconds=deps.blm.timeout_seconds,
|
|
default_branded_plus_ratio=0.9, # non-default
|
|
python_exe=deps.blm.python_exe,
|
|
)
|
|
new_deps = Deps(blm=blm_cfg, llm_check=deps.llm_check)
|
|
|
|
job_rel = "jobs/test-project.json"
|
|
(Path(blm_cfg.blm_dir) / "jobs").mkdir()
|
|
(Path(blm_cfg.blm_dir) / job_rel).write_text("{}")
|
|
|
|
procs = [
|
|
_mock_proc(stdout=ingest_success_stdout),
|
|
_mock_proc(stdout=generate_success_stdout),
|
|
]
|
|
with patch(
|
|
"link_building_workflow.blm.run_blm_command", side_effect=procs
|
|
) as mock_run:
|
|
run_cora_backlinks(
|
|
xlsx_path=str(xlsx_file),
|
|
project_name="Test Project",
|
|
money_site_url="https://example.com",
|
|
deps=new_deps,
|
|
branded_plus_ratio=None, # should pick up 0.9 default
|
|
)
|
|
ingest_args = mock_run.call_args_list[0][0][0]
|
|
assert "-bp" in ingest_args
|
|
assert ingest_args[ingest_args.index("-bp") + 1] == "0.9"
|