""" Integration tests for content injection Tests full flow with database """ import pytest from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker from src.database.models import Base, User, Project, SiteDeployment, GeneratedContent, ArticleLink from src.database.repositories import ( ProjectRepository, GeneratedContentRepository, SiteDeploymentRepository, ArticleLinkRepository ) from src.interlinking.content_injection import inject_interlinks from src.generation.url_generator import generate_urls_for_batch from src.interlinking.tiered_links import find_tiered_links @pytest.fixture def db_session(): """Create an in-memory SQLite database for testing""" engine = create_engine('sqlite:///:memory:') Base.metadata.create_all(engine) Session = sessionmaker(bind=engine) session = Session() yield session session.close() @pytest.fixture def user(db_session): """Create a test user""" user = User( username="testuser", hashed_password="hashed_pwd", role="Admin" ) db_session.add(user) db_session.commit() db_session.refresh(user) return user @pytest.fixture def project(db_session, user): """Create a test project""" project = Project( user_id=user.id, name="Test Project", main_keyword="shaft machining", tier=1, money_site_url="https://moneysite.com", related_searches=["cnc machining", "precision machining"], entities=["lathe", "mill", "CNC"] ) db_session.add(project) db_session.commit() db_session.refresh(project) return project @pytest.fixture def site_deployment(db_session): """Create a test site deployment""" site = SiteDeployment( site_name="Test Site", custom_hostname="www.testsite.com", storage_zone_id=123, storage_zone_name="test-zone", storage_zone_password="test-pass", storage_zone_region="NY", pull_zone_id=456, pull_zone_bcdn_hostname="testsite.b-cdn.net" ) db_session.add(site) db_session.commit() db_session.refresh(site) return site @pytest.fixture def content_repo(db_session): return GeneratedContentRepository(db_session) @pytest.fixture def project_repo(db_session): return ProjectRepository(db_session) @pytest.fixture def site_repo(db_session): return SiteDeploymentRepository(db_session) @pytest.fixture def link_repo(db_session): return ArticleLinkRepository(db_session) class TestTier1ContentInjection: """Integration tests for Tier 1 content injection""" def test_tier1_batch_with_money_site_links( self, db_session, project, site_deployment, content_repo, project_repo, site_repo, link_repo ): """Test full flow: create T1 articles, inject money site links, See Also section""" # Create 3 tier1 articles articles = [] for i in range(3): content = content_repo.create( project_id=project.id, tier="tier1", keyword=f"keyword_{i}", title=f"Article {i} About Shaft Machining", outline={"sections": ["intro", "body"]}, content=f"
This is article {i} about shaft machining and Home page. Learn about shaft machining here.
", word_count=50, status="generated", site_deployment_id=site_deployment.id ) articles.append(content) # Generate URLs article_urls = generate_urls_for_batch(articles, site_repo) # Find tiered links job_config = None tiered_links = find_tiered_links(articles, job_config, project_repo, content_repo, site_repo) assert tiered_links['tier'] == 1 assert tiered_links['money_site_url'] == "https://moneysite.com" # Inject interlinks inject_interlinks(articles, article_urls, tiered_links, project, job_config, content_repo, link_repo) # Verify each article for i, article in enumerate(articles): db_session.refresh(article) # Should have money site link assert '' in article.content # Should have See Also section assert "Content about shaft machining and processes Home today.
", word_count=30, status="generated", site_deployment_id=site_deployment.id ) # Generate URL article_urls = generate_urls_for_batch([content], site_repo) # Find tiered links tiered_links = find_tiered_links([content], None, project_repo, content_repo, site_repo) # Inject interlinks inject_interlinks([content], article_urls, tiered_links, project, None, content_repo, link_repo) db_session.refresh(content) # Should have homepage link with "Home" as anchor text to /index.html assert '' in content.content assert 'index.html">Home' in content.content # Check homepage link in database outbound_links = link_repo.get_by_source_article(content.id) homepage_links = [l for l in outbound_links if l.link_type == "homepage"] assert len(homepage_links) >= 1 class TestTier2ContentInjection: """Integration tests for Tier 2 content injection""" def test_tier2_links_to_tier1( self, db_session, project, site_deployment, content_repo, project_repo, site_repo, link_repo ): """Test T2 articles linking to T1 articles""" # Create 5 tier1 articles t1_articles = [] for i in range(5): content = content_repo.create( project_id=project.id, tier="tier1", keyword=f"t1_keyword_{i}", title=f"T1 Article {i}", outline={"sections": []}, content=f"T1 article {i} content about shaft machining.
", word_count=30, status="generated", site_deployment_id=site_deployment.id ) t1_articles.append(content) # Create 3 tier2 articles t2_articles = [] for i in range(3): content = content_repo.create( project_id=project.id, tier="tier2", keyword=f"t2_keyword_{i}", title=f"T2 Article {i}", outline={"sections": []}, content=f"T2 article {i} with cnc machining and precision machining content here.
", word_count=40, status="generated", site_deployment_id=site_deployment.id ) t2_articles.append(content) # Generate URLs for T2 articles article_urls = generate_urls_for_batch(t2_articles, site_repo) # Find tiered links for T2 tiered_links = find_tiered_links(t2_articles, None, project_repo, content_repo, site_repo) assert tiered_links['tier'] == 2 assert tiered_links['lower_tier'] == 1 assert len(tiered_links['lower_tier_urls']) >= 2 # Should select 2-4 random T1 URLs # Inject interlinks inject_interlinks(t2_articles, article_urls, tiered_links, project, None, content_repo, link_repo) # Verify T2 articles for article in t2_articles: db_session.refresh(article) # Should have links to T1 articles assert '' in content.content # Should have homepage link (using "Home" anchor to /index.html) assert 'index.html">Home' in content.content def test_large_batch( self, db_session, project, site_deployment, content_repo, project_repo, site_repo, link_repo ): """Test batch with 20 articles""" articles = [] for i in range(20): content = content_repo.create( project_id=project.id, tier="tier1", keyword=f"kw_{i}", title=f"Article {i}", outline={}, content=f"Article {i} about shaft machining processes.
", word_count=30, status="generated", site_deployment_id=site_deployment.id ) articles.append(content) article_urls = generate_urls_for_batch(articles, site_repo) tiered_links = find_tiered_links(articles, None, project_repo, content_repo, site_repo) inject_interlinks(articles, article_urls, tiered_links, project, None, content_repo, link_repo) # Verify first article has 19 See Also links first_article = articles[0] db_session.refresh(first_article) assert "Content {i} about shaft machining here.
", word_count=30, status="generated", site_deployment_id=site_deployment.id ) articles.append(content) article_urls = generate_urls_for_batch(articles, site_repo) tiered_links = find_tiered_links(articles, None, project_repo, content_repo, site_repo) inject_interlinks(articles, article_urls, tiered_links, project, None, content_repo, link_repo) # Check all link types exist all_tiered = link_repo.get_by_link_type("tiered") all_homepage = link_repo.get_by_link_type("homepage") all_see_also = link_repo.get_by_link_type("wheel_see_also") assert len(all_tiered) >= 3 # At least 1 per article assert len(all_see_also) >= 6 # Each article links to 2 others def test_internal_vs_external_links( self, db_session, project, site_deployment, content_repo, project_repo, site_repo, link_repo ): """Test internal (to_content_id) vs external (to_url) links""" # Create T1 articles t1_articles = [] for i in range(2): content = content_repo.create( project_id=project.id, tier="tier1", keyword=f"t1_{i}", title=f"T1 Article {i}", outline={}, content=f"T1 content {i} about shaft machining.
", word_count=30, status="generated", site_deployment_id=site_deployment.id ) t1_articles.append(content) article_urls = generate_urls_for_batch(t1_articles, site_repo) tiered_links = find_tiered_links(t1_articles, None, project_repo, content_repo, site_repo) inject_interlinks(t1_articles, article_urls, tiered_links, project, None, content_repo, link_repo) # Check links for first article outbound = link_repo.get_by_source_article(t1_articles[0].id) # Tiered link (to money site) should have to_url, not to_content_id tiered = [l for l in outbound if l.link_type == "tiered"] assert len(tiered) >= 1 assert tiered[0].to_url is not None assert tiered[0].to_content_id is None # See Also links should have to_content_id see_also = [l for l in outbound if l.link_type == "wheel_see_also"] for link in see_also: assert link.to_content_id is not None assert link.to_content_id in [a.id for a in t1_articles]