334 lines
9.0 KiB
Python
334 lines
9.0 KiB
Python
"""
|
|
Unit tests for JobConfig parser
|
|
"""
|
|
|
|
import pytest
|
|
import json
|
|
from pathlib import Path
|
|
from src.generation.job_config import JobConfig, TIER_DEFAULTS
|
|
|
|
|
|
@pytest.fixture
|
|
def temp_job_file(tmp_path):
|
|
"""Create a temporary job file for testing"""
|
|
def _create_file(data):
|
|
job_file = tmp_path / "test_job.json"
|
|
with open(job_file, 'w') as f:
|
|
json.dump(data, f)
|
|
return str(job_file)
|
|
return _create_file
|
|
|
|
|
|
def test_load_job_config_valid(temp_job_file):
|
|
"""Test loading valid job file"""
|
|
data = {
|
|
"jobs": [
|
|
{
|
|
"project_id": 1,
|
|
"tiers": {
|
|
"tier1": {
|
|
"count": 5
|
|
}
|
|
}
|
|
}
|
|
]
|
|
}
|
|
|
|
job_file = temp_job_file(data)
|
|
config = JobConfig(job_file)
|
|
|
|
assert len(config.get_jobs()) == 1
|
|
assert config.get_jobs()[0].project_id == 1
|
|
assert "tier1" in config.get_jobs()[0].tiers
|
|
|
|
|
|
def test_tier_defaults_applied(temp_job_file):
|
|
"""Test defaults applied when not in job file"""
|
|
data = {
|
|
"jobs": [
|
|
{
|
|
"project_id": 1,
|
|
"tiers": {
|
|
"tier1": {
|
|
"count": 3
|
|
}
|
|
}
|
|
}
|
|
]
|
|
}
|
|
|
|
job_file = temp_job_file(data)
|
|
config = JobConfig(job_file)
|
|
|
|
job = config.get_jobs()[0]
|
|
tier1_config = job.tiers["tier1"]
|
|
|
|
assert tier1_config.count == 3
|
|
assert tier1_config.min_word_count == TIER_DEFAULTS["tier1"]["min_word_count"]
|
|
assert tier1_config.max_word_count == TIER_DEFAULTS["tier1"]["max_word_count"]
|
|
|
|
|
|
def test_custom_values_override_defaults(temp_job_file):
|
|
"""Test custom values override defaults"""
|
|
data = {
|
|
"jobs": [
|
|
{
|
|
"project_id": 1,
|
|
"tiers": {
|
|
"tier1": {
|
|
"count": 5,
|
|
"min_word_count": 3000,
|
|
"max_word_count": 3500
|
|
}
|
|
}
|
|
}
|
|
]
|
|
}
|
|
|
|
job_file = temp_job_file(data)
|
|
config = JobConfig(job_file)
|
|
|
|
job = config.get_jobs()[0]
|
|
tier1_config = job.tiers["tier1"]
|
|
|
|
assert tier1_config.min_word_count == 3000
|
|
assert tier1_config.max_word_count == 3500
|
|
|
|
|
|
def test_multiple_jobs_in_file(temp_job_file):
|
|
"""Test parsing file with multiple jobs"""
|
|
data = {
|
|
"jobs": [
|
|
{
|
|
"project_id": 1,
|
|
"tiers": {"tier1": {"count": 5}}
|
|
},
|
|
{
|
|
"project_id": 2,
|
|
"tiers": {"tier2": {"count": 10}}
|
|
}
|
|
]
|
|
}
|
|
|
|
job_file = temp_job_file(data)
|
|
config = JobConfig(job_file)
|
|
|
|
jobs = config.get_jobs()
|
|
assert len(jobs) == 2
|
|
assert jobs[0].project_id == 1
|
|
assert jobs[1].project_id == 2
|
|
|
|
|
|
def test_multiple_tiers_in_job(temp_job_file):
|
|
"""Test job with multiple tiers"""
|
|
data = {
|
|
"jobs": [
|
|
{
|
|
"project_id": 1,
|
|
"tiers": {
|
|
"tier1": {"count": 5},
|
|
"tier2": {"count": 10},
|
|
"tier3": {"count": 15}
|
|
}
|
|
}
|
|
]
|
|
}
|
|
|
|
job_file = temp_job_file(data)
|
|
config = JobConfig(job_file)
|
|
|
|
job = config.get_jobs()[0]
|
|
assert len(job.tiers) == 3
|
|
assert "tier1" in job.tiers
|
|
assert "tier2" in job.tiers
|
|
assert "tier3" in job.tiers
|
|
|
|
|
|
def test_invalid_job_file_no_jobs_key(temp_job_file):
|
|
"""Test error when jobs key is missing"""
|
|
data = {"invalid": []}
|
|
|
|
job_file = temp_job_file(data)
|
|
|
|
with pytest.raises(ValueError, match="must contain either 'jobs' array or 'project_id' field"):
|
|
JobConfig(job_file)
|
|
|
|
|
|
def test_invalid_job_missing_project_id(temp_job_file):
|
|
"""Test error when project_id is missing"""
|
|
data = {
|
|
"jobs": [
|
|
{
|
|
"tiers": {"tier1": {"count": 5}}
|
|
}
|
|
]
|
|
}
|
|
|
|
job_file = temp_job_file(data)
|
|
|
|
with pytest.raises(ValueError, match="missing 'project_id'"):
|
|
JobConfig(job_file)
|
|
|
|
|
|
def test_file_not_found():
|
|
"""Test error when file doesn't exist"""
|
|
with pytest.raises(FileNotFoundError):
|
|
JobConfig("nonexistent_file.json")
|
|
|
|
|
|
def test_explicit_anchor_text_job_level(temp_job_file):
|
|
"""Test explicit anchor text configuration at job level"""
|
|
data = {
|
|
"jobs": [{
|
|
"project_id": 26,
|
|
"anchor_text_config": {
|
|
"mode": "explicit",
|
|
"tier1": ["high volume", "precision machining"],
|
|
"tier2": ["bulk manufacturing", "large scale"]
|
|
},
|
|
"tiers": {
|
|
"tier1": {"count": 12},
|
|
"tier2": {"count": 38}
|
|
}
|
|
}]
|
|
}
|
|
|
|
job_file = temp_job_file(data)
|
|
config = JobConfig(job_file)
|
|
|
|
job = config.get_jobs()[0]
|
|
assert job.anchor_text_config is not None
|
|
assert job.anchor_text_config.mode == "explicit"
|
|
assert job.anchor_text_config.tier1 == ["high volume", "precision machining"]
|
|
assert job.anchor_text_config.tier2 == ["bulk manufacturing", "large scale"]
|
|
|
|
|
|
def test_explicit_anchor_text_tier_level(temp_job_file):
|
|
"""Test explicit anchor text configuration at tier level"""
|
|
data = {
|
|
"jobs": [{
|
|
"project_id": 26,
|
|
"tiers": {
|
|
"tier1": {
|
|
"count": 12,
|
|
"anchor_text_config": {
|
|
"mode": "explicit",
|
|
"terms": ["high volume", "precision"]
|
|
}
|
|
},
|
|
"tier2": {"count": 38}
|
|
}
|
|
}]
|
|
}
|
|
|
|
job_file = temp_job_file(data)
|
|
config = JobConfig(job_file)
|
|
|
|
job = config.get_jobs()[0]
|
|
tier1_config = job.tiers["tier1"]
|
|
assert tier1_config.anchor_text_config is not None
|
|
assert tier1_config.anchor_text_config.mode == "explicit"
|
|
assert tier1_config.anchor_text_config.terms == ["high volume", "precision"]
|
|
|
|
|
|
def test_explicit_anchor_text_tier_override_job(temp_job_file):
|
|
"""Test tier-level explicit config overrides job-level"""
|
|
data = {
|
|
"jobs": [{
|
|
"project_id": 26,
|
|
"anchor_text_config": {
|
|
"mode": "explicit",
|
|
"tier1": ["job level term"],
|
|
"tier2": ["bulk manufacturing"]
|
|
},
|
|
"tiers": {
|
|
"tier1": {
|
|
"count": 12,
|
|
"anchor_text_config": {
|
|
"mode": "explicit",
|
|
"terms": ["tier level term"]
|
|
}
|
|
},
|
|
"tier2": {"count": 38}
|
|
}
|
|
}]
|
|
}
|
|
|
|
job_file = temp_job_file(data)
|
|
config = JobConfig(job_file)
|
|
|
|
job = config.get_jobs()[0]
|
|
tier1_config = job.tiers["tier1"]
|
|
assert tier1_config.anchor_text_config.terms == ["tier level term"]
|
|
assert job.anchor_text_config.tier1 == ["job level term"]
|
|
|
|
|
|
def test_explicit_mode_requires_terms_job_level(temp_job_file):
|
|
"""Test that explicit mode requires tier-specific terms at job level"""
|
|
data = {
|
|
"jobs": [{
|
|
"project_id": 26,
|
|
"anchor_text_config": {
|
|
"mode": "explicit"
|
|
},
|
|
"tiers": {
|
|
"tier1": {"count": 12}
|
|
}
|
|
}]
|
|
}
|
|
|
|
job_file = temp_job_file(data)
|
|
|
|
with pytest.raises(ValueError, match="must have at least one tier-specific term list"):
|
|
JobConfig(job_file)
|
|
|
|
|
|
def test_explicit_mode_requires_terms_tier_level(temp_job_file):
|
|
"""Test that explicit mode requires terms at tier level"""
|
|
data = {
|
|
"jobs": [{
|
|
"project_id": 26,
|
|
"tiers": {
|
|
"tier1": {
|
|
"count": 12,
|
|
"anchor_text_config": {
|
|
"mode": "explicit"
|
|
}
|
|
}
|
|
}
|
|
}]
|
|
}
|
|
|
|
job_file = temp_job_file(data)
|
|
|
|
with pytest.raises(ValueError, match="must have 'terms' array"):
|
|
JobConfig(job_file)
|
|
|
|
|
|
def test_explicit_anchor_text_all_tiers(temp_job_file):
|
|
"""Test explicit anchor text for all tier levels"""
|
|
data = {
|
|
"jobs": [{
|
|
"project_id": 26,
|
|
"anchor_text_config": {
|
|
"mode": "explicit",
|
|
"tier1": ["tier1 term"],
|
|
"tier2": ["tier2 term"],
|
|
"tier3": ["tier3 term"],
|
|
"tier4_plus": ["tier4 term"]
|
|
},
|
|
"tiers": {
|
|
"tier1": {"count": 12},
|
|
"tier2": {"count": 38}
|
|
}
|
|
}]
|
|
}
|
|
|
|
job_file = temp_job_file(data)
|
|
config = JobConfig(job_file)
|
|
|
|
job = config.get_jobs()[0]
|
|
assert job.anchor_text_config.tier1 == ["tier1 term"]
|
|
assert job.anchor_text_config.tier2 == ["tier2 term"]
|
|
assert job.anchor_text_config.tier3 == ["tier3 term"]
|
|
assert job.anchor_text_config.tier4_plus == ["tier4 term"] |