331 lines
11 KiB
Python
331 lines
11 KiB
Python
"""
|
|
Unit tests for tiered link finder
|
|
"""
|
|
|
|
import pytest
|
|
from unittest.mock import Mock, MagicMock, patch
|
|
from src.interlinking.tiered_links import (
|
|
find_tiered_links,
|
|
_validate_batch_tier,
|
|
_extract_tier_number,
|
|
_get_link_count_range
|
|
)
|
|
from src.database.models import GeneratedContent, Project
|
|
from src.generation.job_config import Job
|
|
|
|
|
|
class TestExtractTierNumber:
|
|
"""Tests for _extract_tier_number helper"""
|
|
|
|
def test_tier1(self):
|
|
assert _extract_tier_number("tier1") == 1
|
|
|
|
def test_tier2(self):
|
|
assert _extract_tier_number("tier2") == 2
|
|
|
|
def test_tier3(self):
|
|
assert _extract_tier_number("tier3") == 3
|
|
|
|
def test_invalid_format(self):
|
|
with pytest.raises(ValueError, match="Invalid tier format"):
|
|
_extract_tier_number("invalid")
|
|
|
|
def test_tier_without_number(self):
|
|
with pytest.raises(ValueError, match="Invalid tier format"):
|
|
_extract_tier_number("tier")
|
|
|
|
|
|
class TestValidateBatchTier:
|
|
"""Tests for _validate_batch_tier helper"""
|
|
|
|
def test_single_tier_batch(self):
|
|
content1 = Mock(spec=GeneratedContent)
|
|
content1.tier = "tier1"
|
|
content2 = Mock(spec=GeneratedContent)
|
|
content2.tier = "tier1"
|
|
|
|
result = _validate_batch_tier([content1, content2])
|
|
assert result == "tier1"
|
|
|
|
def test_mixed_tiers_raises_error(self):
|
|
content1 = Mock(spec=GeneratedContent)
|
|
content1.tier = "tier1"
|
|
content2 = Mock(spec=GeneratedContent)
|
|
content2.tier = "tier2"
|
|
|
|
with pytest.raises(ValueError, match="All articles in batch must be same tier"):
|
|
_validate_batch_tier([content1, content2])
|
|
|
|
|
|
class TestGetLinkCountRange:
|
|
"""Tests for _get_link_count_range helper"""
|
|
|
|
def test_default_range(self):
|
|
result = _get_link_count_range(None)
|
|
assert result == {"min": 2, "max": 4}
|
|
|
|
def test_job_object_with_range(self):
|
|
job = Job(
|
|
project_id=1,
|
|
tiers={},
|
|
tiered_link_count_range={"min": 3, "max": 6}
|
|
)
|
|
result = _get_link_count_range(job)
|
|
assert result == {"min": 3, "max": 6}
|
|
|
|
def test_job_object_without_range(self):
|
|
job = Job(
|
|
project_id=1,
|
|
tiers={},
|
|
tiered_link_count_range=None
|
|
)
|
|
result = _get_link_count_range(job)
|
|
assert result == {"min": 2, "max": 4}
|
|
|
|
def test_dict_with_range(self):
|
|
job_dict = {"tiered_link_count_range": {"min": 5, "max": 8}}
|
|
result = _get_link_count_range(job_dict)
|
|
assert result == {"min": 5, "max": 8}
|
|
|
|
def test_dict_without_range(self):
|
|
job_dict = {}
|
|
result = _get_link_count_range(job_dict)
|
|
assert result == {"min": 2, "max": 4}
|
|
|
|
|
|
class TestFindTieredLinks:
|
|
"""Tests for find_tiered_links main function"""
|
|
|
|
def test_empty_content_records_raises_error(self):
|
|
project_repo = Mock()
|
|
content_repo = Mock()
|
|
site_repo = Mock()
|
|
|
|
with pytest.raises(ValueError, match="content_records cannot be empty"):
|
|
find_tiered_links([], None, project_repo, content_repo, site_repo)
|
|
|
|
def test_tier1_returns_money_site_url(self):
|
|
content = Mock(spec=GeneratedContent)
|
|
content.tier = "tier1"
|
|
content.project_id = 1
|
|
|
|
project = Mock(spec=Project)
|
|
project.money_site_url = "https://www.mymoneysite.com"
|
|
|
|
project_repo = Mock()
|
|
project_repo.get_by_id.return_value = project
|
|
|
|
content_repo = Mock()
|
|
site_repo = Mock()
|
|
|
|
result = find_tiered_links([content], None, project_repo, content_repo, site_repo)
|
|
|
|
assert result["tier"] == 1
|
|
assert result["money_site_url"] == "https://www.mymoneysite.com"
|
|
project_repo.get_by_id.assert_called_once_with(1)
|
|
|
|
def test_tier1_missing_money_site_url_raises_error(self):
|
|
content = Mock(spec=GeneratedContent)
|
|
content.tier = "tier1"
|
|
content.project_id = 1
|
|
|
|
project = Mock(spec=Project)
|
|
project.money_site_url = None
|
|
|
|
project_repo = Mock()
|
|
project_repo.get_by_id.return_value = project
|
|
|
|
content_repo = Mock()
|
|
site_repo = Mock()
|
|
|
|
with pytest.raises(ValueError, match="money_site_url not set in project 1"):
|
|
find_tiered_links([content], None, project_repo, content_repo, site_repo)
|
|
|
|
def test_tier1_missing_project_raises_error(self):
|
|
content = Mock(spec=GeneratedContent)
|
|
content.tier = "tier1"
|
|
content.project_id = 999
|
|
|
|
project_repo = Mock()
|
|
project_repo.get_by_id.return_value = None
|
|
|
|
content_repo = Mock()
|
|
site_repo = Mock()
|
|
|
|
with pytest.raises(ValueError, match="money_site_url not set in project 999"):
|
|
find_tiered_links([content], None, project_repo, content_repo, site_repo)
|
|
|
|
def test_tier2_queries_tier1_articles(self):
|
|
content = Mock(spec=GeneratedContent)
|
|
content.tier = "tier2"
|
|
content.project_id = 1
|
|
|
|
lower_tier_article = Mock(spec=GeneratedContent)
|
|
lower_tier_article.id = 10
|
|
lower_tier_article.tier = "tier1"
|
|
lower_tier_article.title = "Lower Tier Article"
|
|
lower_tier_article.site_deployment_id = 5
|
|
|
|
project_repo = Mock()
|
|
|
|
content_repo = Mock()
|
|
content_repo.get_by_project_and_tier.return_value = [lower_tier_article]
|
|
|
|
site_repo = Mock()
|
|
|
|
with patch('src.interlinking.tiered_links.generate_urls_for_batch') as mock_gen:
|
|
mock_gen.return_value = [{
|
|
"content_id": 10,
|
|
"url": "https://example.com/article.html",
|
|
"title": "Lower Tier Article"
|
|
}]
|
|
|
|
result = find_tiered_links([content], None, project_repo, content_repo, site_repo)
|
|
|
|
assert result["tier"] == 2
|
|
assert result["lower_tier"] == 1
|
|
assert len(result["lower_tier_urls"]) == 1
|
|
assert result["lower_tier_urls"][0] == "https://example.com/article.html"
|
|
content_repo.get_by_project_and_tier.assert_called_once_with(1, "tier1")
|
|
|
|
def test_tier3_queries_tier2_articles(self):
|
|
content = Mock(spec=GeneratedContent)
|
|
content.tier = "tier3"
|
|
content.project_id = 2
|
|
|
|
lower_tier_articles = [
|
|
Mock(id=i, tier="tier2", site_deployment_id=5) for i in range(10)
|
|
]
|
|
|
|
project_repo = Mock()
|
|
|
|
content_repo = Mock()
|
|
content_repo.get_by_project_and_tier.return_value = lower_tier_articles
|
|
|
|
site_repo = Mock()
|
|
|
|
with patch('src.interlinking.tiered_links.generate_urls_for_batch') as mock_gen:
|
|
mock_gen.return_value = [
|
|
{"content_id": i, "url": f"https://example.com/article-{i}.html"}
|
|
for i in range(10)
|
|
]
|
|
|
|
result = find_tiered_links([content], None, project_repo, content_repo, site_repo)
|
|
|
|
assert result["tier"] == 3
|
|
assert result["lower_tier"] == 2
|
|
content_repo.get_by_project_and_tier.assert_called_once_with(2, "tier2")
|
|
|
|
def test_no_lower_tier_articles_raises_error(self):
|
|
content = Mock(spec=GeneratedContent)
|
|
content.tier = "tier2"
|
|
content.project_id = 1
|
|
|
|
project_repo = Mock()
|
|
|
|
content_repo = Mock()
|
|
content_repo.get_by_project_and_tier.return_value = []
|
|
|
|
site_repo = Mock()
|
|
|
|
with pytest.raises(ValueError, match="no tier 1 articles found in project 1"):
|
|
find_tiered_links([content], None, project_repo, content_repo, site_repo)
|
|
|
|
def test_custom_link_count_range(self):
|
|
content = Mock(spec=GeneratedContent)
|
|
content.tier = "tier2"
|
|
content.project_id = 1
|
|
|
|
lower_tier_articles = [
|
|
Mock(id=i, tier="tier1", site_deployment_id=5) for i in range(20)
|
|
]
|
|
|
|
job = Job(
|
|
project_id=1,
|
|
tiers={},
|
|
tiered_link_count_range={"min": 5, "max": 8}
|
|
)
|
|
|
|
project_repo = Mock()
|
|
|
|
content_repo = Mock()
|
|
content_repo.get_by_project_and_tier.return_value = lower_tier_articles
|
|
|
|
site_repo = Mock()
|
|
|
|
with patch('src.interlinking.tiered_links.generate_urls_for_batch') as mock_gen:
|
|
# Mock should return URLs based on how many articles were passed to it
|
|
def mock_url_gen(articles, site_repo):
|
|
return [
|
|
{"content_id": i, "url": f"https://example.com/article-{i}.html"}
|
|
for i in range(len(articles))
|
|
]
|
|
mock_gen.side_effect = mock_url_gen
|
|
|
|
result = find_tiered_links([content], job, project_repo, content_repo, site_repo)
|
|
|
|
url_count = len(result["lower_tier_urls"])
|
|
assert 5 <= url_count <= 8
|
|
|
|
def test_fewer_articles_than_min_uses_all_available(self, caplog):
|
|
content = Mock(spec=GeneratedContent)
|
|
content.tier = "tier2"
|
|
content.project_id = 1
|
|
|
|
lower_tier_articles = [
|
|
Mock(id=1, tier="tier1", site_deployment_id=5)
|
|
]
|
|
|
|
project_repo = Mock()
|
|
|
|
content_repo = Mock()
|
|
content_repo.get_by_project_and_tier.return_value = lower_tier_articles
|
|
|
|
site_repo = Mock()
|
|
|
|
with patch('src.interlinking.tiered_links.generate_urls_for_batch') as mock_gen:
|
|
mock_gen.return_value = [
|
|
{"content_id": 1, "url": "https://example.com/article.html"}
|
|
]
|
|
|
|
result = find_tiered_links([content], None, project_repo, content_repo, site_repo)
|
|
|
|
assert len(result["lower_tier_urls"]) == 1
|
|
assert "Only 1 tier 1 articles available" in caplog.text
|
|
|
|
def test_exact_count_when_min_equals_max(self):
|
|
content = Mock(spec=GeneratedContent)
|
|
content.tier = "tier2"
|
|
content.project_id = 1
|
|
|
|
lower_tier_articles = [
|
|
Mock(id=i, tier="tier1", site_deployment_id=5) for i in range(20)
|
|
]
|
|
|
|
job = Job(
|
|
project_id=1,
|
|
tiers={},
|
|
tiered_link_count_range={"min": 8, "max": 8}
|
|
)
|
|
|
|
project_repo = Mock()
|
|
|
|
content_repo = Mock()
|
|
content_repo.get_by_project_and_tier.return_value = lower_tier_articles
|
|
|
|
site_repo = Mock()
|
|
|
|
with patch('src.interlinking.tiered_links.generate_urls_for_batch') as mock_gen:
|
|
# Mock should return URLs based on how many articles were passed to it
|
|
def mock_url_gen(articles, site_repo):
|
|
return [
|
|
{"content_id": i, "url": f"https://example.com/article-{i}.html"}
|
|
for i in range(len(articles))
|
|
]
|
|
mock_gen.side_effect = mock_url_gen
|
|
|
|
result = find_tiered_links([content], job, project_repo, content_repo, site_repo)
|
|
|
|
assert len(result["lower_tier_urls"]) == 8
|
|
|