Big-Link-Man/tests/unit/test_job_config.py

334 lines
9.0 KiB
Python

"""
Unit tests for JobConfig parser
"""
import pytest
import json
from pathlib import Path
from src.generation.job_config import JobConfig, TIER_DEFAULTS
@pytest.fixture
def temp_job_file(tmp_path):
"""Create a temporary job file for testing"""
def _create_file(data):
job_file = tmp_path / "test_job.json"
with open(job_file, 'w') as f:
json.dump(data, f)
return str(job_file)
return _create_file
def test_load_job_config_valid(temp_job_file):
"""Test loading valid job file"""
data = {
"jobs": [
{
"project_id": 1,
"tiers": {
"tier1": {
"count": 5
}
}
}
]
}
job_file = temp_job_file(data)
config = JobConfig(job_file)
assert len(config.get_jobs()) == 1
assert config.get_jobs()[0].project_id == 1
assert "tier1" in config.get_jobs()[0].tiers
def test_tier_defaults_applied(temp_job_file):
"""Test defaults applied when not in job file"""
data = {
"jobs": [
{
"project_id": 1,
"tiers": {
"tier1": {
"count": 3
}
}
}
]
}
job_file = temp_job_file(data)
config = JobConfig(job_file)
job = config.get_jobs()[0]
tier1_config = job.tiers["tier1"]
assert tier1_config.count == 3
assert tier1_config.min_word_count == TIER_DEFAULTS["tier1"]["min_word_count"]
assert tier1_config.max_word_count == TIER_DEFAULTS["tier1"]["max_word_count"]
def test_custom_values_override_defaults(temp_job_file):
"""Test custom values override defaults"""
data = {
"jobs": [
{
"project_id": 1,
"tiers": {
"tier1": {
"count": 5,
"min_word_count": 3000,
"max_word_count": 3500
}
}
}
]
}
job_file = temp_job_file(data)
config = JobConfig(job_file)
job = config.get_jobs()[0]
tier1_config = job.tiers["tier1"]
assert tier1_config.min_word_count == 3000
assert tier1_config.max_word_count == 3500
def test_multiple_jobs_in_file(temp_job_file):
"""Test parsing file with multiple jobs"""
data = {
"jobs": [
{
"project_id": 1,
"tiers": {"tier1": {"count": 5}}
},
{
"project_id": 2,
"tiers": {"tier2": {"count": 10}}
}
]
}
job_file = temp_job_file(data)
config = JobConfig(job_file)
jobs = config.get_jobs()
assert len(jobs) == 2
assert jobs[0].project_id == 1
assert jobs[1].project_id == 2
def test_multiple_tiers_in_job(temp_job_file):
"""Test job with multiple tiers"""
data = {
"jobs": [
{
"project_id": 1,
"tiers": {
"tier1": {"count": 5},
"tier2": {"count": 10},
"tier3": {"count": 15}
}
}
]
}
job_file = temp_job_file(data)
config = JobConfig(job_file)
job = config.get_jobs()[0]
assert len(job.tiers) == 3
assert "tier1" in job.tiers
assert "tier2" in job.tiers
assert "tier3" in job.tiers
def test_invalid_job_file_no_jobs_key(temp_job_file):
"""Test error when jobs key is missing"""
data = {"invalid": []}
job_file = temp_job_file(data)
with pytest.raises(ValueError, match="must contain either 'jobs' array or 'project_id' field"):
JobConfig(job_file)
def test_invalid_job_missing_project_id(temp_job_file):
"""Test error when project_id is missing"""
data = {
"jobs": [
{
"tiers": {"tier1": {"count": 5}}
}
]
}
job_file = temp_job_file(data)
with pytest.raises(ValueError, match="missing 'project_id'"):
JobConfig(job_file)
def test_file_not_found():
"""Test error when file doesn't exist"""
with pytest.raises(FileNotFoundError):
JobConfig("nonexistent_file.json")
def test_explicit_anchor_text_job_level(temp_job_file):
"""Test explicit anchor text configuration at job level"""
data = {
"jobs": [{
"project_id": 26,
"anchor_text_config": {
"mode": "explicit",
"tier1": ["high volume", "precision machining"],
"tier2": ["bulk manufacturing", "large scale"]
},
"tiers": {
"tier1": {"count": 12},
"tier2": {"count": 38}
}
}]
}
job_file = temp_job_file(data)
config = JobConfig(job_file)
job = config.get_jobs()[0]
assert job.anchor_text_config is not None
assert job.anchor_text_config.mode == "explicit"
assert job.anchor_text_config.tier1 == ["high volume", "precision machining"]
assert job.anchor_text_config.tier2 == ["bulk manufacturing", "large scale"]
def test_explicit_anchor_text_tier_level(temp_job_file):
"""Test explicit anchor text configuration at tier level"""
data = {
"jobs": [{
"project_id": 26,
"tiers": {
"tier1": {
"count": 12,
"anchor_text_config": {
"mode": "explicit",
"terms": ["high volume", "precision"]
}
},
"tier2": {"count": 38}
}
}]
}
job_file = temp_job_file(data)
config = JobConfig(job_file)
job = config.get_jobs()[0]
tier1_config = job.tiers["tier1"]
assert tier1_config.anchor_text_config is not None
assert tier1_config.anchor_text_config.mode == "explicit"
assert tier1_config.anchor_text_config.terms == ["high volume", "precision"]
def test_explicit_anchor_text_tier_override_job(temp_job_file):
"""Test tier-level explicit config overrides job-level"""
data = {
"jobs": [{
"project_id": 26,
"anchor_text_config": {
"mode": "explicit",
"tier1": ["job level term"],
"tier2": ["bulk manufacturing"]
},
"tiers": {
"tier1": {
"count": 12,
"anchor_text_config": {
"mode": "explicit",
"terms": ["tier level term"]
}
},
"tier2": {"count": 38}
}
}]
}
job_file = temp_job_file(data)
config = JobConfig(job_file)
job = config.get_jobs()[0]
tier1_config = job.tiers["tier1"]
assert tier1_config.anchor_text_config.terms == ["tier level term"]
assert job.anchor_text_config.tier1 == ["job level term"]
def test_explicit_mode_requires_terms_job_level(temp_job_file):
"""Test that explicit mode requires tier-specific terms at job level"""
data = {
"jobs": [{
"project_id": 26,
"anchor_text_config": {
"mode": "explicit"
},
"tiers": {
"tier1": {"count": 12}
}
}]
}
job_file = temp_job_file(data)
with pytest.raises(ValueError, match="must have at least one tier-specific term list"):
JobConfig(job_file)
def test_explicit_mode_requires_terms_tier_level(temp_job_file):
"""Test that explicit mode requires terms at tier level"""
data = {
"jobs": [{
"project_id": 26,
"tiers": {
"tier1": {
"count": 12,
"anchor_text_config": {
"mode": "explicit"
}
}
}
}]
}
job_file = temp_job_file(data)
with pytest.raises(ValueError, match="must have 'terms' array"):
JobConfig(job_file)
def test_explicit_anchor_text_all_tiers(temp_job_file):
"""Test explicit anchor text for all tier levels"""
data = {
"jobs": [{
"project_id": 26,
"anchor_text_config": {
"mode": "explicit",
"tier1": ["tier1 term"],
"tier2": ["tier2 term"],
"tier3": ["tier3 term"],
"tier4_plus": ["tier4 term"]
},
"tiers": {
"tier1": {"count": 12},
"tier2": {"count": 38}
}
}]
}
job_file = temp_job_file(data)
config = JobConfig(job_file)
job = config.get_jobs()[0]
assert job.anchor_text_config.tier1 == ["tier1 term"]
assert job.anchor_text_config.tier2 == ["tier2 term"]
assert job.anchor_text_config.tier3 == ["tier3 term"]
assert job.anchor_text_config.tier4_plus == ["tier4 term"]