""" Integration tests for Story 3.2: Find Tiered Links """ import pytest from src.database.models import GeneratedContent, SiteDeployment, Project, ArticleLink from src.database.repositories import ( SiteDeploymentRepository, GeneratedContentRepository, ProjectRepository, ArticleLinkRepository ) from src.generation.job_config import Job from src.interlinking.tiered_links import find_tiered_links class TestTieredLinksFindingIntegration: """Integration tests for tiered link finding with real database""" def test_tier1_returns_money_site_url(self, db_session): """Test tier 1 batch returns money site URL from project""" project_repo = ProjectRepository(db_session) content_repo = GeneratedContentRepository(db_session) site_repo = SiteDeploymentRepository(db_session) # Create project with money site URL project = project_repo.create( user_id=1, name="Test Project", data={ "main_keyword": "test keyword", "money_site_url": "https://www.mymoneysite.com" } ) # Create tier 1 content content = content_repo.create( project_id=project.id, tier="tier1", keyword="test", title="Tier 1 Article", outline={}, content="
Test
", word_count=100, status="generated" ) result = find_tiered_links([content], None, project_repo, content_repo, site_repo) assert result["tier"] == 1 assert result["money_site_url"] == "https://www.mymoneysite.com" def test_tier2_queries_tier1_articles_same_project(self, db_session): """Test tier 2 batch queries tier 1 articles from same project only""" project_repo = ProjectRepository(db_session) content_repo = GeneratedContentRepository(db_session) site_repo = SiteDeploymentRepository(db_session) # Create site for tier 1 articles site = site_repo.create( site_name="test-site", storage_zone_id=1, storage_zone_name="test", storage_zone_password="pass", storage_zone_region="DE", pull_zone_id=10, pull_zone_bcdn_hostname="test.b-cdn.net" ) # Create two projects project1 = project_repo.create( user_id=1, name="Project 1", data={"main_keyword": "test1"} ) project2 = project_repo.create( user_id=1, name="Project 2", data={"main_keyword": "test2"} ) # Create tier 1 articles for project 1 tier1_p1_articles = [] for i in range(5): article = content_repo.create( project_id=project1.id, tier="tier1", keyword="test1", title=f"Project 1 Tier 1 Article {i}", outline={}, content="Test
", word_count=100, status="generated", site_deployment_id=site.id ) tier1_p1_articles.append(article) # Create tier 1 articles for project 2 (should not be selected) for i in range(3): content_repo.create( project_id=project2.id, tier="tier1", keyword="test2", title=f"Project 2 Tier 1 Article {i}", outline={}, content="Test
", word_count=100, status="generated", site_deployment_id=site.id ) # Create tier 2 article for project 1 tier2_article = content_repo.create( project_id=project1.id, tier="tier2", keyword="test1", title="Project 1 Tier 2 Article", outline={}, content="Test
", word_count=100, status="generated" ) result = find_tiered_links([tier2_article], None, project_repo, content_repo, site_repo) assert result["tier"] == 2 assert result["lower_tier"] == 1 assert len(result["lower_tier_urls"]) >= 2 assert len(result["lower_tier_urls"]) <= 4 # Verify URLs are from tier 1 project 1 articles only for url in result["lower_tier_urls"]: assert "test.b-cdn.net" in url assert any(f"project-1-tier-1-article-{i}" in url.lower() for i in range(5)) def test_tier3_queries_tier2_articles(self, db_session): """Test tier 3 batch queries tier 2 articles""" project_repo = ProjectRepository(db_session) content_repo = GeneratedContentRepository(db_session) site_repo = SiteDeploymentRepository(db_session) site = site_repo.create( site_name="test-site", storage_zone_id=1, storage_zone_name="test", storage_zone_password="pass", storage_zone_region="DE", pull_zone_id=10, pull_zone_bcdn_hostname="tier2site.b-cdn.net" ) project = project_repo.create( user_id=1, name="Test Project", data={"main_keyword": "test"} ) # Create tier 2 articles for i in range(10): content_repo.create( project_id=project.id, tier="tier2", keyword="test", title=f"Tier 2 Article {i}", outline={}, content="Test
", word_count=100, status="generated", site_deployment_id=site.id ) # Create tier 3 article tier3_article = content_repo.create( project_id=project.id, tier="tier3", keyword="test", title="Tier 3 Article", outline={}, content="Test
", word_count=100, status="generated" ) result = find_tiered_links([tier3_article], None, project_repo, content_repo, site_repo) assert result["tier"] == 3 assert result["lower_tier"] == 2 assert len(result["lower_tier_urls"]) >= 2 assert len(result["lower_tier_urls"]) <= 4 def test_custom_link_count_range(self, db_session): """Test custom link count range from job config""" project_repo = ProjectRepository(db_session) content_repo = GeneratedContentRepository(db_session) site_repo = SiteDeploymentRepository(db_session) site = site_repo.create( site_name="test-site", storage_zone_id=1, storage_zone_name="test", storage_zone_password="pass", storage_zone_region="DE", pull_zone_id=10, pull_zone_bcdn_hostname="test.b-cdn.net" ) project = project_repo.create( user_id=1, name="Test Project", data={"main_keyword": "test"} ) # Create 15 tier 1 articles for i in range(15): content_repo.create( project_id=project.id, tier="tier1", keyword="test", title=f"Tier 1 Article {i}", outline={}, content="Test
", word_count=100, status="generated", site_deployment_id=site.id ) # Create tier 2 article tier2_article = content_repo.create( project_id=project.id, tier="tier2", keyword="test", title="Tier 2 Article", outline={}, content="Test
", word_count=100, status="generated" ) # Test with custom range (min=5, max=8) job = Job( project_id=project.id, tiers={}, tiered_link_count_range={"min": 5, "max": 8} ) result = find_tiered_links([tier2_article], job, project_repo, content_repo, site_repo) url_count = len(result["lower_tier_urls"]) assert 5 <= url_count <= 8 def test_exact_count_when_min_equals_max(self, db_session): """Test exact link count when min equals max""" project_repo = ProjectRepository(db_session) content_repo = GeneratedContentRepository(db_session) site_repo = SiteDeploymentRepository(db_session) site = site_repo.create( site_name="test-site", storage_zone_id=1, storage_zone_name="test", storage_zone_password="pass", storage_zone_region="DE", pull_zone_id=10, pull_zone_bcdn_hostname="test.b-cdn.net" ) project = project_repo.create( user_id=1, name="Test Project", data={"main_keyword": "test"} ) # Create 20 tier 1 articles for i in range(20): content_repo.create( project_id=project.id, tier="tier1", keyword="test", title=f"Tier 1 Article {i}", outline={}, content="Test
", word_count=100, status="generated", site_deployment_id=site.id ) tier2_article = content_repo.create( project_id=project.id, tier="tier2", keyword="test", title="Tier 2 Article", outline={}, content="Test
", word_count=100, status="generated" ) # Test with exact count (min=7, max=7) job = Job( project_id=project.id, tiers={}, tiered_link_count_range={"min": 7, "max": 7} ) result = find_tiered_links([tier2_article], job, project_repo, content_repo, site_repo) assert len(result["lower_tier_urls"]) == 7 def test_insufficient_lower_tier_articles_uses_all(self, db_session): """Test that all available articles are used when fewer than min requested""" project_repo = ProjectRepository(db_session) content_repo = GeneratedContentRepository(db_session) site_repo = SiteDeploymentRepository(db_session) site = site_repo.create( site_name="test-site", storage_zone_id=1, storage_zone_name="test", storage_zone_password="pass", storage_zone_region="DE", pull_zone_id=10, pull_zone_bcdn_hostname="test.b-cdn.net" ) project = project_repo.create( user_id=1, name="Test Project", data={"main_keyword": "test"} ) # Create only 1 tier 1 article content_repo.create( project_id=project.id, tier="tier1", keyword="test", title="Only Tier 1 Article", outline={}, content="Test
", word_count=100, status="generated", site_deployment_id=site.id ) tier2_article = content_repo.create( project_id=project.id, tier="tier2", keyword="test", title="Tier 2 Article", outline={}, content="Test
", word_count=100, status="generated" ) result = find_tiered_links([tier2_article], None, project_repo, content_repo, site_repo) # Should return the 1 available article even though min is 2 assert len(result["lower_tier_urls"]) == 1 class TestArticleLinkRepositoryIntegration: """Integration tests for ArticleLink repository with database constraints""" def test_create_and_query_tiered_links(self, db_session): """Test creating and querying tiered links""" project_repo = ProjectRepository(db_session) content_repo = GeneratedContentRepository(db_session) link_repo = ArticleLinkRepository(db_session) project = project_repo.create( user_id=1, name="Test Project", data={"main_keyword": "test"} ) # Create tier 1 and tier 2 articles tier1_article = content_repo.create( project_id=project.id, tier="tier1", keyword="test", title="Tier 1 Article", outline={}, content="Test
", word_count=100, status="generated" ) tier2_article = content_repo.create( project_id=project.id, tier="tier2", keyword="test", title="Tier 2 Article", outline={}, content="Test
", word_count=100, status="generated" ) # Create tiered link from tier 2 to tier 1 link = link_repo.create( from_content_id=tier2_article.id, to_content_id=tier1_article.id, link_type="tiered" ) assert link.id is not None # Query links outbound = link_repo.get_by_source_article(tier2_article.id) assert len(outbound) == 1 assert outbound[0].to_content_id == tier1_article.id inbound = link_repo.get_by_target_article(tier1_article.id) assert len(inbound) == 1 assert inbound[0].from_content_id == tier2_article.id def test_create_money_site_link(self, db_session): """Test creating external link to money site""" project_repo = ProjectRepository(db_session) content_repo = GeneratedContentRepository(db_session) link_repo = ArticleLinkRepository(db_session) project = project_repo.create( user_id=1, name="Test Project", data={ "main_keyword": "test", "money_site_url": "https://www.moneysite.com" } ) tier1_article = content_repo.create( project_id=project.id, tier="tier1", keyword="test", title="Tier 1 Article", outline={}, content="Test
", word_count=100, status="generated" ) # Create link to money site link = link_repo.create( from_content_id=tier1_article.id, to_content_id=None, to_url="https://www.moneysite.com", link_type="tiered" ) assert link.to_content_id is None assert link.to_url == "https://www.moneysite.com" # Query links = link_repo.get_by_source_article(tier1_article.id) assert len(links) == 1 assert links[0].to_url == "https://www.moneysite.com" def test_multiple_link_types(self, db_session): """Test different link types (tiered, wheel_next, wheel_prev, homepage)""" project_repo = ProjectRepository(db_session) content_repo = GeneratedContentRepository(db_session) link_repo = ArticleLinkRepository(db_session) project = project_repo.create( user_id=1, name="Test Project", data={"main_keyword": "test"} ) # Create 3 articles articles = [] for i in range(3): article = content_repo.create( project_id=project.id, tier="tier1", keyword="test", title=f"Article {i}", outline={}, content="Test
", word_count=100, status="generated" ) articles.append(article) # Create different link types tiered_link = link_repo.create( from_content_id=articles[0].id, to_content_id=articles[1].id, link_type="tiered" ) wheel_next_link = link_repo.create( from_content_id=articles[0].id, to_content_id=articles[1].id, link_type="wheel_next" ) wheel_prev_link = link_repo.create( from_content_id=articles[1].id, to_content_id=articles[0].id, link_type="wheel_prev" ) homepage_link = link_repo.create( from_content_id=articles[2].id, to_content_id=articles[0].id, link_type="homepage" ) # Query by type tiered_links = link_repo.get_by_link_type("tiered") assert len(tiered_links) == 1 wheel_links = link_repo.get_by_link_type("wheel_next") assert len(wheel_links) == 1 # Article 0 should have multiple outbound links outbound = link_repo.get_by_source_article(articles[0].id) assert len(outbound) == 2 # tiered and wheel_next