"""Tests for the link building pipeline tools.""" from __future__ import annotations import json import subprocess from unittest.mock import MagicMock, patch import pytest from cheddahbot.tools.linkbuilding import ( _build_ingest_args, _fuzzy_keyword_match, _normalize_for_match, _parse_generate_output, _parse_ingest_output, blm_generate_batch, blm_ingest_cora, run_cora_backlinks, run_link_building, scan_cora_folder, ) # --------------------------------------------------------------------------- # Fixtures # --------------------------------------------------------------------------- @pytest.fixture() def mock_config(): """Minimal config object for tool context.""" config = MagicMock() config.link_building.blm_dir = "/fake/blm" config.link_building.watch_folder = "" config.link_building.watch_interval_minutes = 60 config.link_building.default_branded_plus_ratio = 0.7 config.clickup.enabled = False config.clickup.api_token = "" config.clickup.workspace_id = "" config.clickup.space_id = "" config.clickup.in_progress_status = "in progress" config.clickup.task_type_field_name = "Work Category" config.clickup.skill_map = {} return config @pytest.fixture() def mock_ctx(tmp_db, mock_config): """Provide a tool context dict with db and config.""" return { "config": mock_config, "db": tmp_db, } @pytest.fixture() def ingest_success_stdout(): """Stdout from a successful ingest-cora run.""" return ( "Authenticated as: testuser (User)\n" "\n" "Parsing CORA file: /tmp/test.xlsx\n" "Main Keyword: precision cnc machining\n" "Word Count: 1500\n" "Entities Found: 12\n" "Related Searches: 8\n" "\n" "Creating project: Test Project\n" "Money Site URL: https://example.com\n" "\n" "Success: Project 'Test Project' created (ID: 42)\n" "Main Keyword: precision cnc machining\n" "Money Site URL: https://example.com\n" "Entities: 12\n" "Related Searches: 8\n" "Job file created: jobs/test-project.json\n" ) @pytest.fixture() def generate_success_stdout(): """Stdout from a successful generate-batch run.""" return ( "Authenticated as: testuser (User)\n" "Initializing AI client with default model: gpt-4o-mini\n" "\n" "Processing job file: jobs/test-project.json\n" "Concurrent workers: 3\n" "\n" "Job file moved to: jobs/done/test-project.json\n" ) # --------------------------------------------------------------------------- # Output parser tests # --------------------------------------------------------------------------- class TestParseIngestOutput: def test_parses_success(self, ingest_success_stdout): result = _parse_ingest_output(ingest_success_stdout) assert result["project_id"] == "42" assert result["project_name"] == "Test Project" assert result["job_file"] == "jobs/test-project.json" assert result["main_keyword"] == "precision cnc machining" def test_empty_stdout(self): result = _parse_ingest_output("") assert result["project_id"] == "" assert result["job_file"] == "" assert result["project_name"] == "" assert result["main_keyword"] == "" def test_partial_output_no_job_file(self): stdout = "Success: Project 'My Project' created (ID: 99)\n" result = _parse_ingest_output(stdout) assert result["project_id"] == "99" assert result["project_name"] == "My Project" assert result["job_file"] == "" def test_error_output(self): stdout = "Error: Authentication failed\n" result = _parse_ingest_output(stdout) assert result["project_id"] == "" assert result["job_file"] == "" def test_project_with_special_chars(self): stdout = ( "Success: Project 'O'Brien & Sons (LLC)'" " created (ID: 7)\n" "Job file created: jobs/obrien.json\n" ) result = _parse_ingest_output(stdout) # Regex won't match greedy quote - that's ok, just verify no crash assert result["job_file"] == "jobs/obrien.json" def test_job_file_with_date_suffix(self): stdout = "Job file created: jobs/my-project-260219.json\n" result = _parse_ingest_output(stdout) assert result["job_file"] == "jobs/my-project-260219.json" class TestParseGenerateOutput: def test_parses_success(self, generate_success_stdout): result = _parse_generate_output(generate_success_stdout) assert result["success"] is True assert result["job_moved_to"] == "jobs/done/test-project.json" def test_empty_stdout(self): result = _parse_generate_output("") assert result["success"] is False assert result["job_moved_to"] == "" def test_no_job_moved_line(self): stdout = "Authenticated as: testuser (User)\nProcessing...\n" result = _parse_generate_output(stdout) assert result["success"] is False assert result["raw_output"] == stdout # --------------------------------------------------------------------------- # CLI arg builder tests # --------------------------------------------------------------------------- class TestBuildIngestArgs: def test_basic_args(self): args = _build_ingest_args("/tmp/test.xlsx", "My Project") assert args[0] == "ingest-cora" assert "-f" in args assert args[args.index("-f") + 1] == "/tmp/test.xlsx" assert "-n" in args assert args[args.index("-n") + 1] == "My Project" assert "-m" in args # always present def test_with_money_site_url(self): args = _build_ingest_args("/tmp/test.xlsx", "Proj", money_site_url="https://example.com") assert args[args.index("-m") + 1] == "https://example.com" def test_placeholder_url_when_empty(self): args = _build_ingest_args("/tmp/test.xlsx", "Proj") assert args[args.index("-m") + 1] == "https://placeholder.example.com" def test_custom_branded_plus_ratio(self): args = _build_ingest_args("/tmp/test.xlsx", "Proj", branded_plus_ratio=0.5) assert "-bp" in args assert args[args.index("-bp") + 1] == "0.5" def test_default_ratio_omitted(self): args = _build_ingest_args("/tmp/test.xlsx", "Proj", branded_plus_ratio=0.7) assert "-bp" not in args def test_custom_anchors(self): args = _build_ingest_args("/tmp/test.xlsx", "Proj", custom_anchors="anchor1,anchor2") assert "-a" in args assert args[args.index("-a") + 1] == "anchor1,anchor2" def test_extra_cli_flags(self): args = _build_ingest_args("/tmp/test.xlsx", "Proj", cli_flags="-r 5 -t 0.3") assert "-r" in args assert "5" in args assert "-t" in args assert "0.3" in args def test_all_params(self): args = _build_ingest_args( "/tmp/test.xlsx", "Full Project", money_site_url="https://site.com", branded_plus_ratio=0.6, custom_anchors="a,b", cli_flags="-r 3", ) assert "-f" in args assert "-n" in args assert "-m" in args assert "-bp" in args assert "-a" in args assert "-r" in args # --------------------------------------------------------------------------- # Fuzzy matching tests # --------------------------------------------------------------------------- class TestFuzzyKeywordMatch: def test_exact_match(self): assert _fuzzy_keyword_match("precision cnc", "precision cnc") is True def test_substring_match_a_in_b(self): assert _fuzzy_keyword_match("cnc machining", "precision cnc machining services") is True def test_substring_match_b_in_a(self): assert _fuzzy_keyword_match("precision cnc machining services", "cnc machining") is True def test_word_overlap(self): assert _fuzzy_keyword_match("precision cnc machining", "cnc machining precision") is True def test_no_match(self): assert _fuzzy_keyword_match("precision cnc", "web design agency") is False def test_empty_strings(self): assert _fuzzy_keyword_match("", "test") is False assert _fuzzy_keyword_match("test", "") is False assert _fuzzy_keyword_match("", "") is False class TestNormalizeForMatch: def test_lowercase_and_strip(self): assert _normalize_for_match(" CNC Machining ") == "cnc machining" def test_removes_special_chars(self): assert _normalize_for_match("O'Brien-&-Sons") == "o brien sons" def test_collapses_spaces(self): assert _normalize_for_match("cnc machining services") == "cnc machining services" # --------------------------------------------------------------------------- # run_link_building orchestrator tests # --------------------------------------------------------------------------- class TestRunLinkBuilding: def test_requires_xlsx_for_cora(self, mock_ctx): result = run_link_building(lb_method="Cora Backlinks", ctx=mock_ctx) assert "Skipped" in result assert "xlsx_path" in result def test_default_method_is_cora(self, mock_ctx): result = run_link_building(ctx=mock_ctx) assert "Skipped" in result # No xlsx_path def test_unknown_method(self, mock_ctx): result = run_link_building(lb_method="MCP Link Building", ctx=mock_ctx) assert "Unknown LB Method" in result @patch("cheddahbot.tools.linkbuilding.run_cora_backlinks") def test_routes_to_cora(self, mock_cora, mock_ctx, tmp_path): mock_cora.return_value = "Success" xlsx = tmp_path / "test.xlsx" xlsx.write_text("fake") run_link_building( lb_method="Cora Backlinks", xlsx_path=str(xlsx), project_name="Test", ctx=mock_ctx, ) mock_cora.assert_called_once() # --------------------------------------------------------------------------- # run_cora_backlinks pipeline tests # --------------------------------------------------------------------------- class TestRunCoraBacklinks: def test_missing_xlsx_path(self, mock_ctx): result = run_cora_backlinks(xlsx_path="", project_name="Test", ctx=mock_ctx) assert "Error" in result def test_missing_project_name(self, mock_ctx): result = run_cora_backlinks(xlsx_path="/fake.xlsx", project_name="", ctx=mock_ctx) assert "Error" in result def test_xlsx_not_found(self, mock_ctx): result = run_cora_backlinks( xlsx_path="/nonexistent/file.xlsx", project_name="Test", ctx=mock_ctx ) assert "not found" in result @patch("cheddahbot.tools.linkbuilding._run_blm_command") def test_happy_path( self, mock_cmd, mock_ctx, tmp_path, ingest_success_stdout, generate_success_stdout ): xlsx = tmp_path / "test.xlsx" xlsx.write_text("fake data") # First call: ingest-cora ingest_proc = subprocess.CompletedProcess( args=[], returncode=0, stdout=ingest_success_stdout, stderr="" ) # Second call: generate-batch gen_proc = subprocess.CompletedProcess( args=[], returncode=0, stdout=generate_success_stdout, stderr="" ) mock_cmd.side_effect = [ingest_proc, gen_proc] result = run_cora_backlinks(xlsx_path=str(xlsx), project_name="Test Project", ctx=mock_ctx) assert "Step 1: Ingest CORA Report" in result assert "Step 2: Generate Content Batch" in result assert "ID: 42" in result assert mock_cmd.call_count == 2 @patch("cheddahbot.tools.linkbuilding._run_blm_command") def test_ingest_failure(self, mock_cmd, mock_ctx, tmp_path): xlsx = tmp_path / "test.xlsx" xlsx.write_text("fake data") mock_cmd.return_value = subprocess.CompletedProcess( args=[], returncode=1, stdout="Error: parsing failed", stderr="traceback" ) result = run_cora_backlinks(xlsx_path=str(xlsx), project_name="Test", ctx=mock_ctx) assert "Error" in result assert "ingest-cora failed" in result @patch("cheddahbot.tools.linkbuilding._run_blm_command") def test_generate_failure(self, mock_cmd, mock_ctx, tmp_path, ingest_success_stdout): xlsx = tmp_path / "test.xlsx" xlsx.write_text("fake data") ingest_proc = subprocess.CompletedProcess( args=[], returncode=0, stdout=ingest_success_stdout, stderr="" ) gen_proc = subprocess.CompletedProcess( args=[], returncode=1, stdout="Error: generation failed", stderr="traceback" ) mock_cmd.side_effect = [ingest_proc, gen_proc] result = run_cora_backlinks(xlsx_path=str(xlsx), project_name="Test", ctx=mock_ctx) assert "Step 1: Ingest CORA Report" in result # Step 1 succeeded assert "generate-batch failed" in result @patch("cheddahbot.tools.linkbuilding._run_blm_command") def test_ingest_timeout(self, mock_cmd, mock_ctx, tmp_path): xlsx = tmp_path / "test.xlsx" xlsx.write_text("fake data") mock_cmd.side_effect = subprocess.TimeoutExpired(cmd="test", timeout=1800) result = run_cora_backlinks(xlsx_path=str(xlsx), project_name="Test", ctx=mock_ctx) assert "timed out" in result # --------------------------------------------------------------------------- # blm_ingest_cora standalone tests # --------------------------------------------------------------------------- class TestBlmIngestCora: def test_missing_xlsx_path(self, mock_ctx): result = blm_ingest_cora(xlsx_path="", project_name="Test", ctx=mock_ctx) assert "Error" in result def test_missing_project_name(self, mock_ctx): result = blm_ingest_cora(xlsx_path="/fake.xlsx", project_name="", ctx=mock_ctx) assert "Error" in result def test_file_not_found(self, mock_ctx): result = blm_ingest_cora(xlsx_path="/nonexistent.xlsx", project_name="Test", ctx=mock_ctx) assert "not found" in result @patch("cheddahbot.tools.linkbuilding._run_blm_command") def test_success(self, mock_cmd, mock_ctx, tmp_path, ingest_success_stdout): xlsx = tmp_path / "test.xlsx" xlsx.write_text("fake") mock_cmd.return_value = subprocess.CompletedProcess( args=[], returncode=0, stdout=ingest_success_stdout, stderr="" ) result = blm_ingest_cora(xlsx_path=str(xlsx), project_name="Test Project", ctx=mock_ctx) assert "CORA ingest complete" in result assert "ID: 42" in result assert "jobs/test-project.json" in result @patch("cheddahbot.tools.linkbuilding._run_blm_command") def test_failure(self, mock_cmd, mock_ctx, tmp_path): xlsx = tmp_path / "test.xlsx" xlsx.write_text("fake") mock_cmd.return_value = subprocess.CompletedProcess( args=[], returncode=1, stdout="Error: bad file", stderr="" ) result = blm_ingest_cora(xlsx_path=str(xlsx), project_name="Test", ctx=mock_ctx) assert "Error" in result assert "ingest-cora failed" in result # --------------------------------------------------------------------------- # blm_generate_batch standalone tests # --------------------------------------------------------------------------- class TestBlmGenerateBatch: def test_missing_job_file(self, mock_ctx): result = blm_generate_batch(job_file="", ctx=mock_ctx) assert "Error" in result def test_file_not_found(self, mock_ctx): result = blm_generate_batch(job_file="/nonexistent.json", ctx=mock_ctx) assert "not found" in result @patch("cheddahbot.tools.linkbuilding._run_blm_command") def test_success(self, mock_cmd, mock_ctx, tmp_path, generate_success_stdout): job = tmp_path / "test.json" job.write_text("{}") mock_ctx["config"].link_building.blm_dir = str(tmp_path) mock_cmd.return_value = subprocess.CompletedProcess( args=[], returncode=0, stdout=generate_success_stdout, stderr="" ) result = blm_generate_batch(job_file=str(job), ctx=mock_ctx) assert "Content generation complete" in result assert "jobs/done/test-project.json" in result @patch("cheddahbot.tools.linkbuilding._run_blm_command") def test_continue_on_error_flag(self, mock_cmd, mock_ctx, tmp_path): job = tmp_path / "test.json" job.write_text("{}") mock_ctx["config"].link_building.blm_dir = str(tmp_path) mock_cmd.return_value = subprocess.CompletedProcess( args=[], returncode=0, stdout="Job file moved to: done/test.json\n", stderr="" ) blm_generate_batch(job_file=str(job), continue_on_error=True, ctx=mock_ctx) call_args = mock_cmd.call_args[0][0] assert "--continue-on-error" in call_args @patch("cheddahbot.tools.linkbuilding._run_blm_command") def test_debug_flag(self, mock_cmd, mock_ctx, tmp_path): job = tmp_path / "test.json" job.write_text("{}") mock_ctx["config"].link_building.blm_dir = str(tmp_path) mock_cmd.return_value = subprocess.CompletedProcess( args=[], returncode=0, stdout="", stderr="" ) blm_generate_batch(job_file=str(job), debug=True, ctx=mock_ctx) call_args = mock_cmd.call_args[0][0] assert "--debug" in call_args # --------------------------------------------------------------------------- # scan_cora_folder tests # --------------------------------------------------------------------------- class TestScanCoraFolder: def test_no_context(self): result = scan_cora_folder(ctx=None) assert "Error" in result def test_watch_folder_not_configured(self, mock_ctx): mock_ctx["config"].link_building.watch_folder = "" result = scan_cora_folder(ctx=mock_ctx) assert "not configured" in result def test_watch_folder_not_exists(self, mock_ctx): mock_ctx["config"].link_building.watch_folder = "/nonexistent/folder" result = scan_cora_folder(ctx=mock_ctx) assert "does not exist" in result def test_empty_folder(self, mock_ctx, tmp_path): mock_ctx["config"].link_building.watch_folder = str(tmp_path) result = scan_cora_folder(ctx=mock_ctx) assert "No .xlsx files" in result def test_finds_xlsx_files(self, mock_ctx, tmp_path): mock_ctx["config"].link_building.watch_folder = str(tmp_path) (tmp_path / "report1.xlsx").write_text("fake") (tmp_path / "report2.xlsx").write_text("fake") (tmp_path / "readme.txt").write_text("ignore me") result = scan_cora_folder(ctx=mock_ctx) assert "report1.xlsx" in result assert "report2.xlsx" in result assert "readme.txt" not in result def test_shows_processed_subfolder(self, mock_ctx, tmp_path): mock_ctx["config"].link_building.watch_folder = str(tmp_path) (tmp_path / "new.xlsx").write_text("fake") processed = tmp_path / "processed" processed.mkdir() (processed / "old.xlsx").write_text("fake") result = scan_cora_folder(ctx=mock_ctx) assert "new.xlsx" in result assert "Processed" in result assert "old.xlsx" in result def test_shows_kv_status(self, mock_ctx, tmp_path): mock_ctx["config"].link_building.watch_folder = str(tmp_path) (tmp_path / "tracked.xlsx").write_text("fake") db = mock_ctx["db"] db.kv_set("linkbuilding:watched:tracked.xlsx", json.dumps({"status": "completed"})) result = scan_cora_folder(ctx=mock_ctx) assert "completed" in result # --------------------------------------------------------------------------- # ClickUp state machine tests # --------------------------------------------------------------------------- class TestClickUpStateMachine: @patch("cheddahbot.tools.linkbuilding._run_blm_command") @patch("cheddahbot.tools.linkbuilding._get_clickup_client") def test_pipeline_sets_completed_state( self, mock_cu, mock_cmd, mock_ctx, tmp_path, ingest_success_stdout, generate_success_stdout ): xlsx = tmp_path / "test.xlsx" xlsx.write_text("fake") # Mock ClickUp client cu = MagicMock() cu.get_tasks_from_space.return_value = [] mock_cu.return_value = cu # Inject a clickup_task_id via ctx mock_ctx["clickup_task_id"] = "task_abc" mock_ctx["config"].clickup.enabled = True # Pre-set executing state mock_ctx["db"].kv_set( "clickup:task:task_abc:state", json.dumps({"state": "executing"}), ) ingest_proc = subprocess.CompletedProcess( args=[], returncode=0, stdout=ingest_success_stdout, stderr="" ) gen_proc = subprocess.CompletedProcess( args=[], returncode=0, stdout=generate_success_stdout, stderr="" ) mock_cmd.side_effect = [ingest_proc, gen_proc] result = run_cora_backlinks(xlsx_path=str(xlsx), project_name="Test", ctx=mock_ctx) assert "ClickUp Sync" in result # Verify KV state was updated raw = mock_ctx["db"].kv_get("clickup:task:task_abc:state") state = json.loads(raw) assert state["state"] == "completed" @patch("cheddahbot.tools.linkbuilding._run_blm_command") @patch("cheddahbot.tools.linkbuilding._get_clickup_client") def test_pipeline_sets_failed_state(self, mock_cu, mock_cmd, mock_ctx, tmp_path): xlsx = tmp_path / "test.xlsx" xlsx.write_text("fake") cu = MagicMock() mock_cu.return_value = cu mock_ctx["clickup_task_id"] = "task_fail" mock_ctx["config"].clickup.enabled = True mock_ctx["config"].clickup.skill_map = { "Link Building": {"error_status": "internal review"} } mock_ctx["db"].kv_set( "clickup:task:task_fail:state", json.dumps({"state": "executing"}), ) mock_cmd.return_value = subprocess.CompletedProcess( args=[], returncode=1, stdout="Error", stderr="crash" ) result = run_cora_backlinks(xlsx_path=str(xlsx), project_name="Test", ctx=mock_ctx) assert "Error" in result raw = mock_ctx["db"].kv_get("clickup:task:task_fail:state") state = json.loads(raw) assert state["state"] == "failed"