CheddahBot/tests/test_linkbuilding.py

617 lines
22 KiB
Python

"""Tests for the link building pipeline tools."""
from __future__ import annotations
import json
import subprocess
from unittest.mock import MagicMock, patch
import pytest
from cheddahbot.tools.linkbuilding import (
_build_ingest_args,
_fuzzy_keyword_match,
_normalize_for_match,
_parse_generate_output,
_parse_ingest_output,
blm_generate_batch,
blm_ingest_cora,
run_cora_backlinks,
run_link_building,
scan_cora_folder,
)
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture()
def mock_config():
"""Minimal config object for tool context."""
config = MagicMock()
config.link_building.blm_dir = "/fake/blm"
config.link_building.watch_folder = ""
config.link_building.watch_interval_minutes = 60
config.link_building.default_branded_plus_ratio = 0.7
config.clickup.enabled = False
config.clickup.api_token = ""
config.clickup.workspace_id = ""
config.clickup.space_id = ""
config.clickup.in_progress_status = "in progress"
config.clickup.task_type_field_name = "Work Category"
config.clickup.skill_map = {}
return config
@pytest.fixture()
def mock_ctx(tmp_db, mock_config):
"""Provide a tool context dict with db and config."""
return {
"config": mock_config,
"db": tmp_db,
}
@pytest.fixture()
def ingest_success_stdout():
"""Stdout from a successful ingest-cora run."""
return (
"Authenticated as: testuser (User)\n"
"\n"
"Parsing CORA file: /tmp/test.xlsx\n"
"Main Keyword: precision cnc machining\n"
"Word Count: 1500\n"
"Entities Found: 12\n"
"Related Searches: 8\n"
"\n"
"Creating project: Test Project\n"
"Money Site URL: https://example.com\n"
"\n"
"Success: Project 'Test Project' created (ID: 42)\n"
"Main Keyword: precision cnc machining\n"
"Money Site URL: https://example.com\n"
"Entities: 12\n"
"Related Searches: 8\n"
"Job file created: jobs/test-project.json\n"
)
@pytest.fixture()
def generate_success_stdout():
"""Stdout from a successful generate-batch run."""
return (
"Authenticated as: testuser (User)\n"
"Initializing AI client with default model: gpt-4o-mini\n"
"\n"
"Processing job file: jobs/test-project.json\n"
"Concurrent workers: 3\n"
"\n"
"Job file moved to: jobs/done/test-project.json\n"
)
# ---------------------------------------------------------------------------
# Output parser tests
# ---------------------------------------------------------------------------
class TestParseIngestOutput:
def test_parses_success(self, ingest_success_stdout):
result = _parse_ingest_output(ingest_success_stdout)
assert result["project_id"] == "42"
assert result["project_name"] == "Test Project"
assert result["job_file"] == "jobs/test-project.json"
assert result["main_keyword"] == "precision cnc machining"
def test_empty_stdout(self):
result = _parse_ingest_output("")
assert result["project_id"] == ""
assert result["job_file"] == ""
assert result["project_name"] == ""
assert result["main_keyword"] == ""
def test_partial_output_no_job_file(self):
stdout = "Success: Project 'My Project' created (ID: 99)\n"
result = _parse_ingest_output(stdout)
assert result["project_id"] == "99"
assert result["project_name"] == "My Project"
assert result["job_file"] == ""
def test_error_output(self):
stdout = "Error: Authentication failed\n"
result = _parse_ingest_output(stdout)
assert result["project_id"] == ""
assert result["job_file"] == ""
def test_project_with_special_chars(self):
stdout = (
"Success: Project 'O'Brien & Sons (LLC)'"
" created (ID: 7)\n"
"Job file created: jobs/obrien.json\n"
)
result = _parse_ingest_output(stdout)
# Regex won't match greedy quote - that's ok, just verify no crash
assert result["job_file"] == "jobs/obrien.json"
def test_job_file_with_date_suffix(self):
stdout = "Job file created: jobs/my-project-260219.json\n"
result = _parse_ingest_output(stdout)
assert result["job_file"] == "jobs/my-project-260219.json"
class TestParseGenerateOutput:
def test_parses_success(self, generate_success_stdout):
result = _parse_generate_output(generate_success_stdout)
assert result["success"] is True
assert result["job_moved_to"] == "jobs/done/test-project.json"
def test_empty_stdout(self):
result = _parse_generate_output("")
assert result["success"] is False
assert result["job_moved_to"] == ""
def test_no_job_moved_line(self):
stdout = "Authenticated as: testuser (User)\nProcessing...\n"
result = _parse_generate_output(stdout)
assert result["success"] is False
assert result["raw_output"] == stdout
# ---------------------------------------------------------------------------
# CLI arg builder tests
# ---------------------------------------------------------------------------
class TestBuildIngestArgs:
def test_basic_args(self):
args = _build_ingest_args("/tmp/test.xlsx", "My Project")
assert args[0] == "ingest-cora"
assert "-f" in args
assert args[args.index("-f") + 1] == "/tmp/test.xlsx"
assert "-n" in args
assert args[args.index("-n") + 1] == "My Project"
assert "-m" in args # always present
def test_with_money_site_url(self):
args = _build_ingest_args("/tmp/test.xlsx", "Proj", money_site_url="https://example.com")
assert args[args.index("-m") + 1] == "https://example.com"
def test_placeholder_url_when_empty(self):
args = _build_ingest_args("/tmp/test.xlsx", "Proj")
assert args[args.index("-m") + 1] == "https://placeholder.example.com"
def test_custom_branded_plus_ratio(self):
args = _build_ingest_args("/tmp/test.xlsx", "Proj", branded_plus_ratio=0.5)
assert "-bp" in args
assert args[args.index("-bp") + 1] == "0.5"
def test_default_ratio_omitted(self):
args = _build_ingest_args("/tmp/test.xlsx", "Proj", branded_plus_ratio=0.7)
assert "-bp" not in args
def test_custom_anchors(self):
args = _build_ingest_args("/tmp/test.xlsx", "Proj", custom_anchors="anchor1,anchor2")
assert "-a" in args
assert args[args.index("-a") + 1] == "anchor1,anchor2"
def test_extra_cli_flags(self):
args = _build_ingest_args("/tmp/test.xlsx", "Proj", cli_flags="-r 5 -t 0.3")
assert "-r" in args
assert "5" in args
assert "-t" in args
assert "0.3" in args
def test_all_params(self):
args = _build_ingest_args(
"/tmp/test.xlsx",
"Full Project",
money_site_url="https://site.com",
branded_plus_ratio=0.6,
custom_anchors="a,b",
cli_flags="-r 3",
)
assert "-f" in args
assert "-n" in args
assert "-m" in args
assert "-bp" in args
assert "-a" in args
assert "-r" in args
# ---------------------------------------------------------------------------
# Fuzzy matching tests
# ---------------------------------------------------------------------------
class TestFuzzyKeywordMatch:
def test_exact_match(self):
assert _fuzzy_keyword_match("precision cnc", "precision cnc") is True
def test_substring_match_a_in_b(self):
assert _fuzzy_keyword_match("cnc machining", "precision cnc machining services") is True
def test_substring_match_b_in_a(self):
assert _fuzzy_keyword_match("precision cnc machining services", "cnc machining") is True
def test_word_overlap(self):
assert _fuzzy_keyword_match("precision cnc machining", "cnc machining precision") is True
def test_no_match(self):
assert _fuzzy_keyword_match("precision cnc", "web design agency") is False
def test_empty_strings(self):
assert _fuzzy_keyword_match("", "test") is False
assert _fuzzy_keyword_match("test", "") is False
assert _fuzzy_keyword_match("", "") is False
class TestNormalizeForMatch:
def test_lowercase_and_strip(self):
assert _normalize_for_match(" CNC Machining ") == "cnc machining"
def test_removes_special_chars(self):
assert _normalize_for_match("O'Brien-&-Sons") == "o brien sons"
def test_collapses_spaces(self):
assert _normalize_for_match("cnc machining services") == "cnc machining services"
# ---------------------------------------------------------------------------
# run_link_building orchestrator tests
# ---------------------------------------------------------------------------
class TestRunLinkBuilding:
def test_requires_xlsx_for_cora(self, mock_ctx):
result = run_link_building(lb_method="Cora Backlinks", ctx=mock_ctx)
assert "Skipped" in result
assert "xlsx_path" in result
def test_default_method_is_cora(self, mock_ctx):
result = run_link_building(ctx=mock_ctx)
assert "Skipped" in result # No xlsx_path
def test_unknown_method(self, mock_ctx):
result = run_link_building(lb_method="MCP Link Building", ctx=mock_ctx)
assert "Unknown LB Method" in result
@patch("cheddahbot.tools.linkbuilding.run_cora_backlinks")
def test_routes_to_cora(self, mock_cora, mock_ctx, tmp_path):
mock_cora.return_value = "Success"
xlsx = tmp_path / "test.xlsx"
xlsx.write_text("fake")
run_link_building(
lb_method="Cora Backlinks",
xlsx_path=str(xlsx),
project_name="Test",
ctx=mock_ctx,
)
mock_cora.assert_called_once()
# ---------------------------------------------------------------------------
# run_cora_backlinks pipeline tests
# ---------------------------------------------------------------------------
class TestRunCoraBacklinks:
def test_missing_xlsx_path(self, mock_ctx):
result = run_cora_backlinks(xlsx_path="", project_name="Test", ctx=mock_ctx)
assert "Error" in result
def test_missing_project_name(self, mock_ctx):
result = run_cora_backlinks(xlsx_path="/fake.xlsx", project_name="", ctx=mock_ctx)
assert "Error" in result
def test_xlsx_not_found(self, mock_ctx):
result = run_cora_backlinks(
xlsx_path="/nonexistent/file.xlsx", project_name="Test", ctx=mock_ctx
)
assert "not found" in result
@patch("cheddahbot.tools.linkbuilding._run_blm_command")
def test_happy_path(
self, mock_cmd, mock_ctx, tmp_path, ingest_success_stdout, generate_success_stdout
):
xlsx = tmp_path / "test.xlsx"
xlsx.write_text("fake data")
# First call: ingest-cora
ingest_proc = subprocess.CompletedProcess(
args=[], returncode=0, stdout=ingest_success_stdout, stderr=""
)
# Second call: generate-batch
gen_proc = subprocess.CompletedProcess(
args=[], returncode=0, stdout=generate_success_stdout, stderr=""
)
mock_cmd.side_effect = [ingest_proc, gen_proc]
result = run_cora_backlinks(xlsx_path=str(xlsx), project_name="Test Project", ctx=mock_ctx)
assert "Step 1: Ingest CORA Report" in result
assert "Step 2: Generate Content Batch" in result
assert "ID: 42" in result
assert mock_cmd.call_count == 2
@patch("cheddahbot.tools.linkbuilding._run_blm_command")
def test_ingest_failure(self, mock_cmd, mock_ctx, tmp_path):
xlsx = tmp_path / "test.xlsx"
xlsx.write_text("fake data")
mock_cmd.return_value = subprocess.CompletedProcess(
args=[], returncode=1, stdout="Error: parsing failed", stderr="traceback"
)
result = run_cora_backlinks(xlsx_path=str(xlsx), project_name="Test", ctx=mock_ctx)
assert "Error" in result
assert "ingest-cora failed" in result
@patch("cheddahbot.tools.linkbuilding._run_blm_command")
def test_generate_failure(self, mock_cmd, mock_ctx, tmp_path, ingest_success_stdout):
xlsx = tmp_path / "test.xlsx"
xlsx.write_text("fake data")
ingest_proc = subprocess.CompletedProcess(
args=[], returncode=0, stdout=ingest_success_stdout, stderr=""
)
gen_proc = subprocess.CompletedProcess(
args=[], returncode=1, stdout="Error: generation failed", stderr="traceback"
)
mock_cmd.side_effect = [ingest_proc, gen_proc]
result = run_cora_backlinks(xlsx_path=str(xlsx), project_name="Test", ctx=mock_ctx)
assert "Step 1: Ingest CORA Report" in result # Step 1 succeeded
assert "generate-batch failed" in result
@patch("cheddahbot.tools.linkbuilding._run_blm_command")
def test_ingest_timeout(self, mock_cmd, mock_ctx, tmp_path):
xlsx = tmp_path / "test.xlsx"
xlsx.write_text("fake data")
mock_cmd.side_effect = subprocess.TimeoutExpired(cmd="test", timeout=1800)
result = run_cora_backlinks(xlsx_path=str(xlsx), project_name="Test", ctx=mock_ctx)
assert "timed out" in result
# ---------------------------------------------------------------------------
# blm_ingest_cora standalone tests
# ---------------------------------------------------------------------------
class TestBlmIngestCora:
def test_missing_xlsx_path(self, mock_ctx):
result = blm_ingest_cora(xlsx_path="", project_name="Test", ctx=mock_ctx)
assert "Error" in result
def test_missing_project_name(self, mock_ctx):
result = blm_ingest_cora(xlsx_path="/fake.xlsx", project_name="", ctx=mock_ctx)
assert "Error" in result
def test_file_not_found(self, mock_ctx):
result = blm_ingest_cora(xlsx_path="/nonexistent.xlsx", project_name="Test", ctx=mock_ctx)
assert "not found" in result
@patch("cheddahbot.tools.linkbuilding._run_blm_command")
def test_success(self, mock_cmd, mock_ctx, tmp_path, ingest_success_stdout):
xlsx = tmp_path / "test.xlsx"
xlsx.write_text("fake")
mock_cmd.return_value = subprocess.CompletedProcess(
args=[], returncode=0, stdout=ingest_success_stdout, stderr=""
)
result = blm_ingest_cora(xlsx_path=str(xlsx), project_name="Test Project", ctx=mock_ctx)
assert "CORA ingest complete" in result
assert "ID: 42" in result
assert "jobs/test-project.json" in result
@patch("cheddahbot.tools.linkbuilding._run_blm_command")
def test_failure(self, mock_cmd, mock_ctx, tmp_path):
xlsx = tmp_path / "test.xlsx"
xlsx.write_text("fake")
mock_cmd.return_value = subprocess.CompletedProcess(
args=[], returncode=1, stdout="Error: bad file", stderr=""
)
result = blm_ingest_cora(xlsx_path=str(xlsx), project_name="Test", ctx=mock_ctx)
assert "Error" in result
assert "ingest-cora failed" in result
# ---------------------------------------------------------------------------
# blm_generate_batch standalone tests
# ---------------------------------------------------------------------------
class TestBlmGenerateBatch:
def test_missing_job_file(self, mock_ctx):
result = blm_generate_batch(job_file="", ctx=mock_ctx)
assert "Error" in result
def test_file_not_found(self, mock_ctx):
result = blm_generate_batch(job_file="/nonexistent.json", ctx=mock_ctx)
assert "not found" in result
@patch("cheddahbot.tools.linkbuilding._run_blm_command")
def test_success(self, mock_cmd, mock_ctx, tmp_path, generate_success_stdout):
job = tmp_path / "test.json"
job.write_text("{}")
mock_ctx["config"].link_building.blm_dir = str(tmp_path)
mock_cmd.return_value = subprocess.CompletedProcess(
args=[], returncode=0, stdout=generate_success_stdout, stderr=""
)
result = blm_generate_batch(job_file=str(job), ctx=mock_ctx)
assert "Content generation complete" in result
assert "jobs/done/test-project.json" in result
@patch("cheddahbot.tools.linkbuilding._run_blm_command")
def test_continue_on_error_flag(self, mock_cmd, mock_ctx, tmp_path):
job = tmp_path / "test.json"
job.write_text("{}")
mock_ctx["config"].link_building.blm_dir = str(tmp_path)
mock_cmd.return_value = subprocess.CompletedProcess(
args=[], returncode=0, stdout="Job file moved to: done/test.json\n", stderr=""
)
blm_generate_batch(job_file=str(job), continue_on_error=True, ctx=mock_ctx)
call_args = mock_cmd.call_args[0][0]
assert "--continue-on-error" in call_args
@patch("cheddahbot.tools.linkbuilding._run_blm_command")
def test_debug_flag(self, mock_cmd, mock_ctx, tmp_path):
job = tmp_path / "test.json"
job.write_text("{}")
mock_ctx["config"].link_building.blm_dir = str(tmp_path)
mock_cmd.return_value = subprocess.CompletedProcess(
args=[], returncode=0, stdout="", stderr=""
)
blm_generate_batch(job_file=str(job), debug=True, ctx=mock_ctx)
call_args = mock_cmd.call_args[0][0]
assert "--debug" in call_args
# ---------------------------------------------------------------------------
# scan_cora_folder tests
# ---------------------------------------------------------------------------
class TestScanCoraFolder:
def test_no_context(self):
result = scan_cora_folder(ctx=None)
assert "Error" in result
def test_watch_folder_not_configured(self, mock_ctx):
mock_ctx["config"].link_building.watch_folder = ""
result = scan_cora_folder(ctx=mock_ctx)
assert "not configured" in result
def test_watch_folder_not_exists(self, mock_ctx):
mock_ctx["config"].link_building.watch_folder = "/nonexistent/folder"
result = scan_cora_folder(ctx=mock_ctx)
assert "does not exist" in result
def test_empty_folder(self, mock_ctx, tmp_path):
mock_ctx["config"].link_building.watch_folder = str(tmp_path)
result = scan_cora_folder(ctx=mock_ctx)
assert "No .xlsx files" in result
def test_finds_xlsx_files(self, mock_ctx, tmp_path):
mock_ctx["config"].link_building.watch_folder = str(tmp_path)
(tmp_path / "report1.xlsx").write_text("fake")
(tmp_path / "report2.xlsx").write_text("fake")
(tmp_path / "readme.txt").write_text("ignore me")
result = scan_cora_folder(ctx=mock_ctx)
assert "report1.xlsx" in result
assert "report2.xlsx" in result
assert "readme.txt" not in result
def test_shows_processed_subfolder(self, mock_ctx, tmp_path):
mock_ctx["config"].link_building.watch_folder = str(tmp_path)
(tmp_path / "new.xlsx").write_text("fake")
processed = tmp_path / "processed"
processed.mkdir()
(processed / "old.xlsx").write_text("fake")
result = scan_cora_folder(ctx=mock_ctx)
assert "new.xlsx" in result
assert "Processed" in result
assert "old.xlsx" in result
def test_shows_kv_status(self, mock_ctx, tmp_path):
mock_ctx["config"].link_building.watch_folder = str(tmp_path)
(tmp_path / "tracked.xlsx").write_text("fake")
db = mock_ctx["db"]
db.kv_set("linkbuilding:watched:tracked.xlsx", json.dumps({"status": "completed"}))
result = scan_cora_folder(ctx=mock_ctx)
assert "completed" in result
# ---------------------------------------------------------------------------
# ClickUp state machine tests
# ---------------------------------------------------------------------------
class TestClickUpStateMachine:
@patch("cheddahbot.tools.linkbuilding._run_blm_command")
@patch("cheddahbot.tools.linkbuilding._get_clickup_client")
def test_pipeline_sets_completed_state(
self, mock_cu, mock_cmd, mock_ctx, tmp_path, ingest_success_stdout, generate_success_stdout
):
xlsx = tmp_path / "test.xlsx"
xlsx.write_text("fake")
# Mock ClickUp client
cu = MagicMock()
cu.get_tasks_from_space.return_value = []
mock_cu.return_value = cu
# Inject a clickup_task_id via ctx
mock_ctx["clickup_task_id"] = "task_abc"
mock_ctx["config"].clickup.enabled = True
# Pre-set executing state
mock_ctx["db"].kv_set(
"clickup:task:task_abc:state",
json.dumps({"state": "executing"}),
)
ingest_proc = subprocess.CompletedProcess(
args=[], returncode=0, stdout=ingest_success_stdout, stderr=""
)
gen_proc = subprocess.CompletedProcess(
args=[], returncode=0, stdout=generate_success_stdout, stderr=""
)
mock_cmd.side_effect = [ingest_proc, gen_proc]
result = run_cora_backlinks(xlsx_path=str(xlsx), project_name="Test", ctx=mock_ctx)
assert "ClickUp Sync" in result
# Verify KV state was updated
raw = mock_ctx["db"].kv_get("clickup:task:task_abc:state")
state = json.loads(raw)
assert state["state"] == "completed"
@patch("cheddahbot.tools.linkbuilding._run_blm_command")
@patch("cheddahbot.tools.linkbuilding._get_clickup_client")
def test_pipeline_sets_failed_state(self, mock_cu, mock_cmd, mock_ctx, tmp_path):
xlsx = tmp_path / "test.xlsx"
xlsx.write_text("fake")
cu = MagicMock()
mock_cu.return_value = cu
mock_ctx["clickup_task_id"] = "task_fail"
mock_ctx["config"].clickup.enabled = True
mock_ctx["config"].clickup.skill_map = {
"Link Building": {"error_status": "internal review"}
}
mock_ctx["db"].kv_set(
"clickup:task:task_fail:state",
json.dumps({"state": "executing"}),
)
mock_cmd.return_value = subprocess.CompletedProcess(
args=[], returncode=1, stdout="Error", stderr="crash"
)
result = run_cora_backlinks(xlsx_path=str(xlsx), project_name="Test", ctx=mock_ctx)
assert "Error" in result
raw = mock_ctx["db"].kv_get("clickup:task:task_fail:state")
state = json.loads(raw)
assert state["state"] == "failed"