Big-Link-Man/tests/unit/test_tiered_links.py

331 lines
11 KiB
Python

"""
Unit tests for tiered link finder
"""
import pytest
from unittest.mock import Mock, MagicMock, patch
from src.interlinking.tiered_links import (
find_tiered_links,
_validate_batch_tier,
_extract_tier_number,
_get_link_count_range
)
from src.database.models import GeneratedContent, Project
from src.generation.job_config import Job
class TestExtractTierNumber:
"""Tests for _extract_tier_number helper"""
def test_tier1(self):
assert _extract_tier_number("tier1") == 1
def test_tier2(self):
assert _extract_tier_number("tier2") == 2
def test_tier3(self):
assert _extract_tier_number("tier3") == 3
def test_invalid_format(self):
with pytest.raises(ValueError, match="Invalid tier format"):
_extract_tier_number("invalid")
def test_tier_without_number(self):
with pytest.raises(ValueError, match="Invalid tier format"):
_extract_tier_number("tier")
class TestValidateBatchTier:
"""Tests for _validate_batch_tier helper"""
def test_single_tier_batch(self):
content1 = Mock(spec=GeneratedContent)
content1.tier = "tier1"
content2 = Mock(spec=GeneratedContent)
content2.tier = "tier1"
result = _validate_batch_tier([content1, content2])
assert result == "tier1"
def test_mixed_tiers_raises_error(self):
content1 = Mock(spec=GeneratedContent)
content1.tier = "tier1"
content2 = Mock(spec=GeneratedContent)
content2.tier = "tier2"
with pytest.raises(ValueError, match="All articles in batch must be same tier"):
_validate_batch_tier([content1, content2])
class TestGetLinkCountRange:
"""Tests for _get_link_count_range helper"""
def test_default_range(self):
result = _get_link_count_range(None)
assert result == {"min": 2, "max": 4}
def test_job_object_with_range(self):
job = Job(
project_id=1,
tiers={},
tiered_link_count_range={"min": 3, "max": 6}
)
result = _get_link_count_range(job)
assert result == {"min": 3, "max": 6}
def test_job_object_without_range(self):
job = Job(
project_id=1,
tiers={},
tiered_link_count_range=None
)
result = _get_link_count_range(job)
assert result == {"min": 2, "max": 4}
def test_dict_with_range(self):
job_dict = {"tiered_link_count_range": {"min": 5, "max": 8}}
result = _get_link_count_range(job_dict)
assert result == {"min": 5, "max": 8}
def test_dict_without_range(self):
job_dict = {}
result = _get_link_count_range(job_dict)
assert result == {"min": 2, "max": 4}
class TestFindTieredLinks:
"""Tests for find_tiered_links main function"""
def test_empty_content_records_raises_error(self):
project_repo = Mock()
content_repo = Mock()
site_repo = Mock()
with pytest.raises(ValueError, match="content_records cannot be empty"):
find_tiered_links([], None, project_repo, content_repo, site_repo)
def test_tier1_returns_money_site_url(self):
content = Mock(spec=GeneratedContent)
content.tier = "tier1"
content.project_id = 1
project = Mock(spec=Project)
project.money_site_url = "https://www.mymoneysite.com"
project_repo = Mock()
project_repo.get_by_id.return_value = project
content_repo = Mock()
site_repo = Mock()
result = find_tiered_links([content], None, project_repo, content_repo, site_repo)
assert result["tier"] == 1
assert result["money_site_url"] == "https://www.mymoneysite.com"
project_repo.get_by_id.assert_called_once_with(1)
def test_tier1_missing_money_site_url_raises_error(self):
content = Mock(spec=GeneratedContent)
content.tier = "tier1"
content.project_id = 1
project = Mock(spec=Project)
project.money_site_url = None
project_repo = Mock()
project_repo.get_by_id.return_value = project
content_repo = Mock()
site_repo = Mock()
with pytest.raises(ValueError, match="money_site_url not set in project 1"):
find_tiered_links([content], None, project_repo, content_repo, site_repo)
def test_tier1_missing_project_raises_error(self):
content = Mock(spec=GeneratedContent)
content.tier = "tier1"
content.project_id = 999
project_repo = Mock()
project_repo.get_by_id.return_value = None
content_repo = Mock()
site_repo = Mock()
with pytest.raises(ValueError, match="money_site_url not set in project 999"):
find_tiered_links([content], None, project_repo, content_repo, site_repo)
def test_tier2_queries_tier1_articles(self):
content = Mock(spec=GeneratedContent)
content.tier = "tier2"
content.project_id = 1
lower_tier_article = Mock(spec=GeneratedContent)
lower_tier_article.id = 10
lower_tier_article.tier = "tier1"
lower_tier_article.title = "Lower Tier Article"
lower_tier_article.site_deployment_id = 5
project_repo = Mock()
content_repo = Mock()
content_repo.get_by_project_and_tier.return_value = [lower_tier_article]
site_repo = Mock()
with patch('src.interlinking.tiered_links.generate_urls_for_batch') as mock_gen:
mock_gen.return_value = [{
"content_id": 10,
"url": "https://example.com/article.html",
"title": "Lower Tier Article"
}]
result = find_tiered_links([content], None, project_repo, content_repo, site_repo)
assert result["tier"] == 2
assert result["lower_tier"] == 1
assert len(result["lower_tier_urls"]) == 1
assert result["lower_tier_urls"][0] == "https://example.com/article.html"
content_repo.get_by_project_and_tier.assert_called_once_with(1, "tier1")
def test_tier3_queries_tier2_articles(self):
content = Mock(spec=GeneratedContent)
content.tier = "tier3"
content.project_id = 2
lower_tier_articles = [
Mock(id=i, tier="tier2", site_deployment_id=5) for i in range(10)
]
project_repo = Mock()
content_repo = Mock()
content_repo.get_by_project_and_tier.return_value = lower_tier_articles
site_repo = Mock()
with patch('src.interlinking.tiered_links.generate_urls_for_batch') as mock_gen:
mock_gen.return_value = [
{"content_id": i, "url": f"https://example.com/article-{i}.html"}
for i in range(10)
]
result = find_tiered_links([content], None, project_repo, content_repo, site_repo)
assert result["tier"] == 3
assert result["lower_tier"] == 2
content_repo.get_by_project_and_tier.assert_called_once_with(2, "tier2")
def test_no_lower_tier_articles_raises_error(self):
content = Mock(spec=GeneratedContent)
content.tier = "tier2"
content.project_id = 1
project_repo = Mock()
content_repo = Mock()
content_repo.get_by_project_and_tier.return_value = []
site_repo = Mock()
with pytest.raises(ValueError, match="no tier 1 articles found in project 1"):
find_tiered_links([content], None, project_repo, content_repo, site_repo)
def test_custom_link_count_range(self):
content = Mock(spec=GeneratedContent)
content.tier = "tier2"
content.project_id = 1
lower_tier_articles = [
Mock(id=i, tier="tier1", site_deployment_id=5) for i in range(20)
]
job = Job(
project_id=1,
tiers={},
tiered_link_count_range={"min": 5, "max": 8}
)
project_repo = Mock()
content_repo = Mock()
content_repo.get_by_project_and_tier.return_value = lower_tier_articles
site_repo = Mock()
with patch('src.interlinking.tiered_links.generate_urls_for_batch') as mock_gen:
# Mock should return URLs based on how many articles were passed to it
def mock_url_gen(articles, site_repo):
return [
{"content_id": i, "url": f"https://example.com/article-{i}.html"}
for i in range(len(articles))
]
mock_gen.side_effect = mock_url_gen
result = find_tiered_links([content], job, project_repo, content_repo, site_repo)
url_count = len(result["lower_tier_urls"])
assert 5 <= url_count <= 8
def test_fewer_articles_than_min_uses_all_available(self, caplog):
content = Mock(spec=GeneratedContent)
content.tier = "tier2"
content.project_id = 1
lower_tier_articles = [
Mock(id=1, tier="tier1", site_deployment_id=5)
]
project_repo = Mock()
content_repo = Mock()
content_repo.get_by_project_and_tier.return_value = lower_tier_articles
site_repo = Mock()
with patch('src.interlinking.tiered_links.generate_urls_for_batch') as mock_gen:
mock_gen.return_value = [
{"content_id": 1, "url": "https://example.com/article.html"}
]
result = find_tiered_links([content], None, project_repo, content_repo, site_repo)
assert len(result["lower_tier_urls"]) == 1
assert "Only 1 tier 1 articles available" in caplog.text
def test_exact_count_when_min_equals_max(self):
content = Mock(spec=GeneratedContent)
content.tier = "tier2"
content.project_id = 1
lower_tier_articles = [
Mock(id=i, tier="tier1", site_deployment_id=5) for i in range(20)
]
job = Job(
project_id=1,
tiers={},
tiered_link_count_range={"min": 8, "max": 8}
)
project_repo = Mock()
content_repo = Mock()
content_repo.get_by_project_and_tier.return_value = lower_tier_articles
site_repo = Mock()
with patch('src.interlinking.tiered_links.generate_urls_for_batch') as mock_gen:
# Mock should return URLs based on how many articles were passed to it
def mock_url_gen(articles, site_repo):
return [
{"content_id": i, "url": f"https://example.com/article-{i}.html"}
for i in range(len(articles))
]
mock_gen.side_effect = mock_url_gen
result = find_tiered_links([content], job, project_repo, content_repo, site_repo)
assert len(result["lower_tier_urls"]) == 8