""" Unit tests for JobConfig parser """ import pytest import json from pathlib import Path from src.generation.job_config import JobConfig, TIER_DEFAULTS @pytest.fixture def temp_job_file(tmp_path): """Create a temporary job file for testing""" def _create_file(data): job_file = tmp_path / "test_job.json" with open(job_file, 'w') as f: json.dump(data, f) return str(job_file) return _create_file def test_load_job_config_valid(temp_job_file): """Test loading valid job file""" data = { "jobs": [ { "project_id": 1, "tiers": { "tier1": { "count": 5 } } } ] } job_file = temp_job_file(data) config = JobConfig(job_file) assert len(config.get_jobs()) == 1 assert config.get_jobs()[0].project_id == 1 assert "tier1" in config.get_jobs()[0].tiers def test_tier_defaults_applied(temp_job_file): """Test defaults applied when not in job file""" data = { "jobs": [ { "project_id": 1, "tiers": { "tier1": { "count": 3 } } } ] } job_file = temp_job_file(data) config = JobConfig(job_file) job = config.get_jobs()[0] tier1_config = job.tiers["tier1"] assert tier1_config.count == 3 assert tier1_config.min_word_count == TIER_DEFAULTS["tier1"]["min_word_count"] assert tier1_config.max_word_count == TIER_DEFAULTS["tier1"]["max_word_count"] def test_custom_values_override_defaults(temp_job_file): """Test custom values override defaults""" data = { "jobs": [ { "project_id": 1, "tiers": { "tier1": { "count": 5, "min_word_count": 3000, "max_word_count": 3500 } } } ] } job_file = temp_job_file(data) config = JobConfig(job_file) job = config.get_jobs()[0] tier1_config = job.tiers["tier1"] assert tier1_config.min_word_count == 3000 assert tier1_config.max_word_count == 3500 def test_multiple_jobs_in_file(temp_job_file): """Test parsing file with multiple jobs""" data = { "jobs": [ { "project_id": 1, "tiers": {"tier1": {"count": 5}} }, { "project_id": 2, "tiers": {"tier2": {"count": 10}} } ] } job_file = temp_job_file(data) config = JobConfig(job_file) jobs = config.get_jobs() assert len(jobs) == 2 assert jobs[0].project_id == 1 assert jobs[1].project_id == 2 def test_multiple_tiers_in_job(temp_job_file): """Test job with multiple tiers""" data = { "jobs": [ { "project_id": 1, "tiers": { "tier1": {"count": 5}, "tier2": {"count": 10}, "tier3": {"count": 15} } } ] } job_file = temp_job_file(data) config = JobConfig(job_file) job = config.get_jobs()[0] assert len(job.tiers) == 3 assert "tier1" in job.tiers assert "tier2" in job.tiers assert "tier3" in job.tiers def test_invalid_job_file_no_jobs_key(temp_job_file): """Test error when jobs key is missing""" data = {"invalid": []} job_file = temp_job_file(data) with pytest.raises(ValueError, match="must contain either 'jobs' array or 'project_id' field"): JobConfig(job_file) def test_invalid_job_missing_project_id(temp_job_file): """Test error when project_id is missing""" data = { "jobs": [ { "tiers": {"tier1": {"count": 5}} } ] } job_file = temp_job_file(data) with pytest.raises(ValueError, match="missing 'project_id'"): JobConfig(job_file) def test_file_not_found(): """Test error when file doesn't exist""" with pytest.raises(FileNotFoundError): JobConfig("nonexistent_file.json") def test_explicit_anchor_text_job_level(temp_job_file): """Test explicit anchor text configuration at job level""" data = { "jobs": [{ "project_id": 26, "anchor_text_config": { "mode": "explicit", "tier1": ["high volume", "precision machining"], "tier2": ["bulk manufacturing", "large scale"] }, "tiers": { "tier1": {"count": 12}, "tier2": {"count": 38} } }] } job_file = temp_job_file(data) config = JobConfig(job_file) job = config.get_jobs()[0] assert job.anchor_text_config is not None assert job.anchor_text_config.mode == "explicit" assert job.anchor_text_config.tier1 == ["high volume", "precision machining"] assert job.anchor_text_config.tier2 == ["bulk manufacturing", "large scale"] def test_explicit_anchor_text_tier_level(temp_job_file): """Test explicit anchor text configuration at tier level""" data = { "jobs": [{ "project_id": 26, "tiers": { "tier1": { "count": 12, "anchor_text_config": { "mode": "explicit", "terms": ["high volume", "precision"] } }, "tier2": {"count": 38} } }] } job_file = temp_job_file(data) config = JobConfig(job_file) job = config.get_jobs()[0] tier1_config = job.tiers["tier1"] assert tier1_config.anchor_text_config is not None assert tier1_config.anchor_text_config.mode == "explicit" assert tier1_config.anchor_text_config.terms == ["high volume", "precision"] def test_explicit_anchor_text_tier_override_job(temp_job_file): """Test tier-level explicit config overrides job-level""" data = { "jobs": [{ "project_id": 26, "anchor_text_config": { "mode": "explicit", "tier1": ["job level term"], "tier2": ["bulk manufacturing"] }, "tiers": { "tier1": { "count": 12, "anchor_text_config": { "mode": "explicit", "terms": ["tier level term"] } }, "tier2": {"count": 38} } }] } job_file = temp_job_file(data) config = JobConfig(job_file) job = config.get_jobs()[0] tier1_config = job.tiers["tier1"] assert tier1_config.anchor_text_config.terms == ["tier level term"] assert job.anchor_text_config.tier1 == ["job level term"] def test_explicit_mode_requires_terms_job_level(temp_job_file): """Test that explicit mode requires tier-specific terms at job level""" data = { "jobs": [{ "project_id": 26, "anchor_text_config": { "mode": "explicit" }, "tiers": { "tier1": {"count": 12} } }] } job_file = temp_job_file(data) with pytest.raises(ValueError, match="must have at least one tier-specific term list"): JobConfig(job_file) def test_explicit_mode_requires_terms_tier_level(temp_job_file): """Test that explicit mode requires terms at tier level""" data = { "jobs": [{ "project_id": 26, "tiers": { "tier1": { "count": 12, "anchor_text_config": { "mode": "explicit" } } } }] } job_file = temp_job_file(data) with pytest.raises(ValueError, match="must have 'terms' array"): JobConfig(job_file) def test_explicit_anchor_text_all_tiers(temp_job_file): """Test explicit anchor text for all tier levels""" data = { "jobs": [{ "project_id": 26, "anchor_text_config": { "mode": "explicit", "tier1": ["tier1 term"], "tier2": ["tier2 term"], "tier3": ["tier3 term"], "tier4_plus": ["tier4 term"] }, "tiers": { "tier1": {"count": 12}, "tier2": {"count": 38} } }] } job_file = temp_job_file(data) config = JobConfig(job_file) job = config.get_jobs()[0] assert job.anchor_text_config.tier1 == ["tier1 term"] assert job.anchor_text_config.tier2 == ["tier2 term"] assert job.anchor_text_config.tier3 == ["tier3 term"] assert job.anchor_text_config.tier4_plus == ["tier4 term"]