Big-Link-Man/tests/integration/test_story_3_2_integration.py

523 lines
17 KiB
Python

"""
Integration tests for Story 3.2: Find Tiered Links
"""
import pytest
from src.database.models import GeneratedContent, SiteDeployment, Project, ArticleLink
from src.database.repositories import (
SiteDeploymentRepository,
GeneratedContentRepository,
ProjectRepository,
ArticleLinkRepository
)
from src.generation.job_config import Job
from src.interlinking.tiered_links import find_tiered_links
class TestTieredLinksFindingIntegration:
"""Integration tests for tiered link finding with real database"""
def test_tier1_returns_money_site_url(self, db_session):
"""Test tier 1 batch returns money site URL from project"""
project_repo = ProjectRepository(db_session)
content_repo = GeneratedContentRepository(db_session)
site_repo = SiteDeploymentRepository(db_session)
# Create project with money site URL
project = project_repo.create(
user_id=1,
name="Test Project",
data={
"main_keyword": "test keyword",
"money_site_url": "https://www.mymoneysite.com"
}
)
# Create tier 1 content
content = content_repo.create(
project_id=project.id,
tier="tier1",
keyword="test",
title="Tier 1 Article",
outline={},
content="<p>Test</p>",
word_count=100,
status="generated"
)
result = find_tiered_links([content], None, project_repo, content_repo, site_repo)
assert result["tier"] == 1
assert result["money_site_url"] == "https://www.mymoneysite.com"
def test_tier2_queries_tier1_articles_same_project(self, db_session):
"""Test tier 2 batch queries tier 1 articles from same project only"""
project_repo = ProjectRepository(db_session)
content_repo = GeneratedContentRepository(db_session)
site_repo = SiteDeploymentRepository(db_session)
# Create site for tier 1 articles
site = site_repo.create(
site_name="test-site",
storage_zone_id=1,
storage_zone_name="test",
storage_zone_password="pass",
storage_zone_region="DE",
pull_zone_id=10,
pull_zone_bcdn_hostname="test.b-cdn.net"
)
# Create two projects
project1 = project_repo.create(
user_id=1,
name="Project 1",
data={"main_keyword": "test1"}
)
project2 = project_repo.create(
user_id=1,
name="Project 2",
data={"main_keyword": "test2"}
)
# Create tier 1 articles for project 1
tier1_p1_articles = []
for i in range(5):
article = content_repo.create(
project_id=project1.id,
tier="tier1",
keyword="test1",
title=f"Project 1 Tier 1 Article {i}",
outline={},
content="<p>Test</p>",
word_count=100,
status="generated",
site_deployment_id=site.id
)
tier1_p1_articles.append(article)
# Create tier 1 articles for project 2 (should not be selected)
for i in range(3):
content_repo.create(
project_id=project2.id,
tier="tier1",
keyword="test2",
title=f"Project 2 Tier 1 Article {i}",
outline={},
content="<p>Test</p>",
word_count=100,
status="generated",
site_deployment_id=site.id
)
# Create tier 2 article for project 1
tier2_article = content_repo.create(
project_id=project1.id,
tier="tier2",
keyword="test1",
title="Project 1 Tier 2 Article",
outline={},
content="<p>Test</p>",
word_count=100,
status="generated"
)
result = find_tiered_links([tier2_article], None, project_repo, content_repo, site_repo)
assert result["tier"] == 2
assert result["lower_tier"] == 1
assert len(result["lower_tier_urls"]) >= 2
assert len(result["lower_tier_urls"]) <= 4
# Verify URLs are from tier 1 project 1 articles only
for url in result["lower_tier_urls"]:
assert "test.b-cdn.net" in url
assert any(f"project-1-tier-1-article-{i}" in url.lower() for i in range(5))
def test_tier3_queries_tier2_articles(self, db_session):
"""Test tier 3 batch queries tier 2 articles"""
project_repo = ProjectRepository(db_session)
content_repo = GeneratedContentRepository(db_session)
site_repo = SiteDeploymentRepository(db_session)
site = site_repo.create(
site_name="test-site",
storage_zone_id=1,
storage_zone_name="test",
storage_zone_password="pass",
storage_zone_region="DE",
pull_zone_id=10,
pull_zone_bcdn_hostname="tier2site.b-cdn.net"
)
project = project_repo.create(
user_id=1,
name="Test Project",
data={"main_keyword": "test"}
)
# Create tier 2 articles
for i in range(10):
content_repo.create(
project_id=project.id,
tier="tier2",
keyword="test",
title=f"Tier 2 Article {i}",
outline={},
content="<p>Test</p>",
word_count=100,
status="generated",
site_deployment_id=site.id
)
# Create tier 3 article
tier3_article = content_repo.create(
project_id=project.id,
tier="tier3",
keyword="test",
title="Tier 3 Article",
outline={},
content="<p>Test</p>",
word_count=100,
status="generated"
)
result = find_tiered_links([tier3_article], None, project_repo, content_repo, site_repo)
assert result["tier"] == 3
assert result["lower_tier"] == 2
assert len(result["lower_tier_urls"]) >= 2
assert len(result["lower_tier_urls"]) <= 4
def test_custom_link_count_range(self, db_session):
"""Test custom link count range from job config"""
project_repo = ProjectRepository(db_session)
content_repo = GeneratedContentRepository(db_session)
site_repo = SiteDeploymentRepository(db_session)
site = site_repo.create(
site_name="test-site",
storage_zone_id=1,
storage_zone_name="test",
storage_zone_password="pass",
storage_zone_region="DE",
pull_zone_id=10,
pull_zone_bcdn_hostname="test.b-cdn.net"
)
project = project_repo.create(
user_id=1,
name="Test Project",
data={"main_keyword": "test"}
)
# Create 15 tier 1 articles
for i in range(15):
content_repo.create(
project_id=project.id,
tier="tier1",
keyword="test",
title=f"Tier 1 Article {i}",
outline={},
content="<p>Test</p>",
word_count=100,
status="generated",
site_deployment_id=site.id
)
# Create tier 2 article
tier2_article = content_repo.create(
project_id=project.id,
tier="tier2",
keyword="test",
title="Tier 2 Article",
outline={},
content="<p>Test</p>",
word_count=100,
status="generated"
)
# Test with custom range (min=5, max=8)
job = Job(
project_id=project.id,
tiers={},
tiered_link_count_range={"min": 5, "max": 8}
)
result = find_tiered_links([tier2_article], job, project_repo, content_repo, site_repo)
url_count = len(result["lower_tier_urls"])
assert 5 <= url_count <= 8
def test_exact_count_when_min_equals_max(self, db_session):
"""Test exact link count when min equals max"""
project_repo = ProjectRepository(db_session)
content_repo = GeneratedContentRepository(db_session)
site_repo = SiteDeploymentRepository(db_session)
site = site_repo.create(
site_name="test-site",
storage_zone_id=1,
storage_zone_name="test",
storage_zone_password="pass",
storage_zone_region="DE",
pull_zone_id=10,
pull_zone_bcdn_hostname="test.b-cdn.net"
)
project = project_repo.create(
user_id=1,
name="Test Project",
data={"main_keyword": "test"}
)
# Create 20 tier 1 articles
for i in range(20):
content_repo.create(
project_id=project.id,
tier="tier1",
keyword="test",
title=f"Tier 1 Article {i}",
outline={},
content="<p>Test</p>",
word_count=100,
status="generated",
site_deployment_id=site.id
)
tier2_article = content_repo.create(
project_id=project.id,
tier="tier2",
keyword="test",
title="Tier 2 Article",
outline={},
content="<p>Test</p>",
word_count=100,
status="generated"
)
# Test with exact count (min=7, max=7)
job = Job(
project_id=project.id,
tiers={},
tiered_link_count_range={"min": 7, "max": 7}
)
result = find_tiered_links([tier2_article], job, project_repo, content_repo, site_repo)
assert len(result["lower_tier_urls"]) == 7
def test_insufficient_lower_tier_articles_uses_all(self, db_session):
"""Test that all available articles are used when fewer than min requested"""
project_repo = ProjectRepository(db_session)
content_repo = GeneratedContentRepository(db_session)
site_repo = SiteDeploymentRepository(db_session)
site = site_repo.create(
site_name="test-site",
storage_zone_id=1,
storage_zone_name="test",
storage_zone_password="pass",
storage_zone_region="DE",
pull_zone_id=10,
pull_zone_bcdn_hostname="test.b-cdn.net"
)
project = project_repo.create(
user_id=1,
name="Test Project",
data={"main_keyword": "test"}
)
# Create only 1 tier 1 article
content_repo.create(
project_id=project.id,
tier="tier1",
keyword="test",
title="Only Tier 1 Article",
outline={},
content="<p>Test</p>",
word_count=100,
status="generated",
site_deployment_id=site.id
)
tier2_article = content_repo.create(
project_id=project.id,
tier="tier2",
keyword="test",
title="Tier 2 Article",
outline={},
content="<p>Test</p>",
word_count=100,
status="generated"
)
result = find_tiered_links([tier2_article], None, project_repo, content_repo, site_repo)
# Should return the 1 available article even though min is 2
assert len(result["lower_tier_urls"]) == 1
class TestArticleLinkRepositoryIntegration:
"""Integration tests for ArticleLink repository with database constraints"""
def test_create_and_query_tiered_links(self, db_session):
"""Test creating and querying tiered links"""
project_repo = ProjectRepository(db_session)
content_repo = GeneratedContentRepository(db_session)
link_repo = ArticleLinkRepository(db_session)
project = project_repo.create(
user_id=1,
name="Test Project",
data={"main_keyword": "test"}
)
# Create tier 1 and tier 2 articles
tier1_article = content_repo.create(
project_id=project.id,
tier="tier1",
keyword="test",
title="Tier 1 Article",
outline={},
content="<p>Test</p>",
word_count=100,
status="generated"
)
tier2_article = content_repo.create(
project_id=project.id,
tier="tier2",
keyword="test",
title="Tier 2 Article",
outline={},
content="<p>Test</p>",
word_count=100,
status="generated"
)
# Create tiered link from tier 2 to tier 1
link = link_repo.create(
from_content_id=tier2_article.id,
to_content_id=tier1_article.id,
link_type="tiered"
)
assert link.id is not None
# Query links
outbound = link_repo.get_by_source_article(tier2_article.id)
assert len(outbound) == 1
assert outbound[0].to_content_id == tier1_article.id
inbound = link_repo.get_by_target_article(tier1_article.id)
assert len(inbound) == 1
assert inbound[0].from_content_id == tier2_article.id
def test_create_money_site_link(self, db_session):
"""Test creating external link to money site"""
project_repo = ProjectRepository(db_session)
content_repo = GeneratedContentRepository(db_session)
link_repo = ArticleLinkRepository(db_session)
project = project_repo.create(
user_id=1,
name="Test Project",
data={
"main_keyword": "test",
"money_site_url": "https://www.moneysite.com"
}
)
tier1_article = content_repo.create(
project_id=project.id,
tier="tier1",
keyword="test",
title="Tier 1 Article",
outline={},
content="<p>Test</p>",
word_count=100,
status="generated"
)
# Create link to money site
link = link_repo.create(
from_content_id=tier1_article.id,
to_content_id=None,
to_url="https://www.moneysite.com",
link_type="tiered"
)
assert link.to_content_id is None
assert link.to_url == "https://www.moneysite.com"
# Query
links = link_repo.get_by_source_article(tier1_article.id)
assert len(links) == 1
assert links[0].to_url == "https://www.moneysite.com"
def test_multiple_link_types(self, db_session):
"""Test different link types (tiered, wheel_next, wheel_prev, homepage)"""
project_repo = ProjectRepository(db_session)
content_repo = GeneratedContentRepository(db_session)
link_repo = ArticleLinkRepository(db_session)
project = project_repo.create(
user_id=1,
name="Test Project",
data={"main_keyword": "test"}
)
# Create 3 articles
articles = []
for i in range(3):
article = content_repo.create(
project_id=project.id,
tier="tier1",
keyword="test",
title=f"Article {i}",
outline={},
content="<p>Test</p>",
word_count=100,
status="generated"
)
articles.append(article)
# Create different link types
tiered_link = link_repo.create(
from_content_id=articles[0].id,
to_content_id=articles[1].id,
link_type="tiered"
)
wheel_next_link = link_repo.create(
from_content_id=articles[0].id,
to_content_id=articles[1].id,
link_type="wheel_next"
)
wheel_prev_link = link_repo.create(
from_content_id=articles[1].id,
to_content_id=articles[0].id,
link_type="wheel_prev"
)
homepage_link = link_repo.create(
from_content_id=articles[2].id,
to_content_id=articles[0].id,
link_type="homepage"
)
# Query by type
tiered_links = link_repo.get_by_link_type("tiered")
assert len(tiered_links) == 1
wheel_links = link_repo.get_by_link_type("wheel_next")
assert len(wheel_links) == 1
# Article 0 should have multiple outbound links
outbound = link_repo.get_by_source_article(articles[0].id)
assert len(outbound) == 2 # tiered and wheel_next