Big-Link-Man/tests/integration/test_content_injection_inte...

491 lines
18 KiB
Python

"""
Integration tests for content injection
Tests full flow with database
"""
import pytest
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from src.database.models import Base, User, Project, SiteDeployment, GeneratedContent, ArticleLink
from src.database.repositories import (
ProjectRepository,
GeneratedContentRepository,
SiteDeploymentRepository,
ArticleLinkRepository
)
from src.interlinking.content_injection import inject_interlinks
from src.generation.url_generator import generate_urls_for_batch
from src.interlinking.tiered_links import find_tiered_links
@pytest.fixture
def db_session():
"""Create an in-memory SQLite database for testing"""
engine = create_engine('sqlite:///:memory:')
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
session = Session()
yield session
session.close()
@pytest.fixture
def user(db_session):
"""Create a test user"""
user = User(
username="testuser",
hashed_password="hashed_pwd",
role="Admin"
)
db_session.add(user)
db_session.commit()
db_session.refresh(user)
return user
@pytest.fixture
def project(db_session, user):
"""Create a test project"""
project = Project(
user_id=user.id,
name="Test Project",
main_keyword="shaft machining",
tier=1,
money_site_url="https://moneysite.com",
related_searches=["cnc machining", "precision machining"],
entities=["lathe", "mill", "CNC"]
)
db_session.add(project)
db_session.commit()
db_session.refresh(project)
return project
@pytest.fixture
def site_deployment(db_session):
"""Create a test site deployment"""
site = SiteDeployment(
site_name="Test Site",
custom_hostname="www.testsite.com",
storage_zone_id=123,
storage_zone_name="test-zone",
storage_zone_password="test-pass",
storage_zone_region="NY",
pull_zone_id=456,
pull_zone_bcdn_hostname="testsite.b-cdn.net"
)
db_session.add(site)
db_session.commit()
db_session.refresh(site)
return site
@pytest.fixture
def content_repo(db_session):
return GeneratedContentRepository(db_session)
@pytest.fixture
def project_repo(db_session):
return ProjectRepository(db_session)
@pytest.fixture
def site_repo(db_session):
return SiteDeploymentRepository(db_session)
@pytest.fixture
def link_repo(db_session):
return ArticleLinkRepository(db_session)
class TestTier1ContentInjection:
"""Integration tests for Tier 1 content injection"""
def test_tier1_batch_with_money_site_links(
self, db_session, project, site_deployment, content_repo, project_repo, site_repo, link_repo
):
"""Test full flow: create T1 articles, inject money site links, See Also section"""
# Create 3 tier1 articles
articles = []
for i in range(3):
content = content_repo.create(
project_id=project.id,
tier="tier1",
keyword=f"keyword_{i}",
title=f"Article {i} About Shaft Machining",
outline={"sections": ["intro", "body"]},
content=f"<p>This is article {i} about shaft machining and Home page. Learn about shaft machining here.</p>",
word_count=50,
status="generated",
site_deployment_id=site_deployment.id
)
articles.append(content)
# Generate URLs
article_urls = generate_urls_for_batch(articles, site_repo)
# Find tiered links
job_config = None
tiered_links = find_tiered_links(articles, job_config, project_repo, content_repo, site_repo)
assert tiered_links['tier'] == 1
assert tiered_links['money_site_url'] == "https://moneysite.com"
# Inject interlinks
inject_interlinks(articles, article_urls, tiered_links, project, job_config, content_repo, link_repo)
# Verify each article
for i, article in enumerate(articles):
db_session.refresh(article)
# Should have money site link
assert '<a href="https://moneysite.com">' in article.content
# Should have See Also section
assert "<h3>See Also</h3>" in article.content
assert "<ul>" in article.content
# Should link to other 2 articles
other_articles = [a for a in articles if a.id != article.id]
for other in other_articles:
assert other.title in article.content
# Check ArticleLink records
outbound_links = link_repo.get_by_source_article(article.id)
# Should have 1 tiered (money site) + 2 wheel_see_also links
assert len(outbound_links) >= 3
tiered_links_found = [l for l in outbound_links if l.link_type == "tiered"]
assert len(tiered_links_found) == 1
assert tiered_links_found[0].to_url == "https://moneysite.com"
see_also_links = [l for l in outbound_links if l.link_type == "wheel_see_also"]
assert len(see_also_links) == 2
def test_tier1_with_homepage_links(
self, db_session, project, site_deployment, content_repo, project_repo, site_repo, link_repo
):
"""Test homepage link injection"""
# Create 1 tier1 article
content = content_repo.create(
project_id=project.id,
tier="tier1",
keyword="test_keyword",
title="Test Article",
outline={"sections": []},
content="<p>Content about shaft machining and processes Home today.</p>",
word_count=30,
status="generated",
site_deployment_id=site_deployment.id
)
# Generate URL
article_urls = generate_urls_for_batch([content], site_repo)
# Find tiered links
tiered_links = find_tiered_links([content], None, project_repo, content_repo, site_repo)
# Inject interlinks
inject_interlinks([content], article_urls, tiered_links, project, None, content_repo, link_repo)
db_session.refresh(content)
# Should have homepage link with "Home" as anchor text to /index.html
assert '<a href=' in content.content and 'Home</a>' in content.content
assert 'index.html">Home</a>' in content.content
# Check homepage link in database
outbound_links = link_repo.get_by_source_article(content.id)
homepage_links = [l for l in outbound_links if l.link_type == "homepage"]
assert len(homepage_links) >= 1
class TestTier2ContentInjection:
"""Integration tests for Tier 2 content injection"""
def test_tier2_links_to_tier1(
self, db_session, project, site_deployment, content_repo, project_repo, site_repo, link_repo
):
"""Test T2 articles linking to T1 articles"""
# Create 5 tier1 articles
t1_articles = []
for i in range(5):
content = content_repo.create(
project_id=project.id,
tier="tier1",
keyword=f"t1_keyword_{i}",
title=f"T1 Article {i}",
outline={"sections": []},
content=f"<p>T1 article {i} content about shaft machining.</p>",
word_count=30,
status="generated",
site_deployment_id=site_deployment.id
)
t1_articles.append(content)
# Create 3 tier2 articles
t2_articles = []
for i in range(3):
content = content_repo.create(
project_id=project.id,
tier="tier2",
keyword=f"t2_keyword_{i}",
title=f"T2 Article {i}",
outline={"sections": []},
content=f"<p>T2 article {i} with cnc machining and precision machining content here.</p>",
word_count=40,
status="generated",
site_deployment_id=site_deployment.id
)
t2_articles.append(content)
# Generate URLs for T2 articles
article_urls = generate_urls_for_batch(t2_articles, site_repo)
# Find tiered links for T2
tiered_links = find_tiered_links(t2_articles, None, project_repo, content_repo, site_repo)
assert tiered_links['tier'] == 2
assert tiered_links['lower_tier'] == 1
assert len(tiered_links['lower_tier_urls']) >= 2 # Should select 2-4 random T1 URLs
# Inject interlinks
inject_interlinks(t2_articles, article_urls, tiered_links, project, None, content_repo, link_repo)
# Verify T2 articles
for article in t2_articles:
db_session.refresh(article)
# Should have links to T1 articles
assert '<a href=' in article.content
# Should have See Also section
assert "<h3>See Also</h3>" in article.content
# Check ArticleLink records
outbound_links = link_repo.get_by_source_article(article.id)
# Should have tiered links + see_also links
tiered_links_found = [l for l in outbound_links if l.link_type == "tiered"]
assert len(tiered_links_found) >= 2 # At least 2 links to T1
# All tiered links should point to T1 articles
for link in tiered_links_found:
assert link.to_url is not None # External URL
class TestAnchorTextConfigOverrides:
"""Integration tests for anchor text config overrides"""
def test_override_mode(
self, db_session, project, site_deployment, content_repo, project_repo, site_repo, link_repo
):
"""Test anchor text override mode"""
content = content_repo.create(
project_id=project.id,
tier="tier1",
keyword="test",
title="Test Article",
outline={},
content="<p>Content with custom anchor and click here for more info text.</p>",
word_count=30,
status="generated",
site_deployment_id=site_deployment.id
)
article_urls = generate_urls_for_batch([content], site_repo)
tiered_links = find_tiered_links([content], None, project_repo, content_repo, site_repo)
# Override anchor text
job_config = {
"anchor_text_config": {
"mode": "override",
"custom_text": ["custom anchor", "click here for more info"]
}
}
inject_interlinks([content], article_urls, tiered_links, project, job_config, content_repo, link_repo)
db_session.refresh(content)
# Should use custom anchor text
assert '<a href=' in content.content
def test_append_mode(
self, db_session, project, site_deployment, content_repo, project_repo, site_repo, link_repo
):
"""Test anchor text append mode"""
content = content_repo.create(
project_id=project.id,
tier="tier1",
keyword="test",
title="Test",
outline={},
content="<p>Article about shaft machining with custom content here.</p>",
word_count=30,
status="generated",
site_deployment_id=site_deployment.id
)
article_urls = generate_urls_for_batch([content], site_repo)
tiered_links = find_tiered_links([content], None, project_repo, content_repo, site_repo)
job_config = {
"anchor_text_config": {
"mode": "append",
"custom_text": ["custom content"]
}
}
inject_interlinks([content], article_urls, tiered_links, project, job_config, content_repo, link_repo)
db_session.refresh(content)
assert '<a href=' in content.content
class TestDifferentBatchSizes:
"""Test with various batch sizes"""
def test_single_article_batch(
self, db_session, project, site_deployment, content_repo, project_repo, site_repo, link_repo
):
"""Test batch with single article"""
content = content_repo.create(
project_id=project.id,
tier="tier1",
keyword="test",
title="Single Article",
outline={},
content="<p>Content about shaft machining and Home information.</p>",
word_count=30,
status="generated",
site_deployment_id=site_deployment.id
)
article_urls = generate_urls_for_batch([content], site_repo)
tiered_links = find_tiered_links([content], None, project_repo, content_repo, site_repo)
inject_interlinks([content], article_urls, tiered_links, project, None, content_repo, link_repo)
db_session.refresh(content)
# Should have money site link (using "shaft machining" anchor)
assert '<a href="https://moneysite.com">' in content.content
# Should have homepage link (using "Home" anchor to /index.html)
assert 'index.html">Home</a>' in content.content
def test_large_batch(
self, db_session, project, site_deployment, content_repo, project_repo, site_repo, link_repo
):
"""Test batch with 20 articles"""
articles = []
for i in range(20):
content = content_repo.create(
project_id=project.id,
tier="tier1",
keyword=f"kw_{i}",
title=f"Article {i}",
outline={},
content=f"<p>Article {i} about shaft machining processes.</p>",
word_count=30,
status="generated",
site_deployment_id=site_deployment.id
)
articles.append(content)
article_urls = generate_urls_for_batch(articles, site_repo)
tiered_links = find_tiered_links(articles, None, project_repo, content_repo, site_repo)
inject_interlinks(articles, article_urls, tiered_links, project, None, content_repo, link_repo)
# Verify first article has 19 See Also links
first_article = articles[0]
db_session.refresh(first_article)
assert "<h3>See Also</h3>" in first_article.content
outbound_links = link_repo.get_by_source_article(first_article.id)
see_also_links = [l for l in outbound_links if l.link_type == "wheel_see_also"]
assert len(see_also_links) == 19
class TestLinkDatabaseRecords:
"""Test ArticleLink database records"""
def test_all_link_types_recorded(
self, db_session, project, site_deployment, content_repo, project_repo, site_repo, link_repo
):
"""Test that all link types are properly recorded"""
articles = []
for i in range(3):
content = content_repo.create(
project_id=project.id,
tier="tier1",
keyword=f"kw_{i}",
title=f"Article {i}",
outline={},
content=f"<p>Content {i} about shaft machining here.</p>",
word_count=30,
status="generated",
site_deployment_id=site_deployment.id
)
articles.append(content)
article_urls = generate_urls_for_batch(articles, site_repo)
tiered_links = find_tiered_links(articles, None, project_repo, content_repo, site_repo)
inject_interlinks(articles, article_urls, tiered_links, project, None, content_repo, link_repo)
# Check all link types exist
all_tiered = link_repo.get_by_link_type("tiered")
all_homepage = link_repo.get_by_link_type("homepage")
all_see_also = link_repo.get_by_link_type("wheel_see_also")
assert len(all_tiered) >= 3 # At least 1 per article
assert len(all_see_also) >= 6 # Each article links to 2 others
def test_internal_vs_external_links(
self, db_session, project, site_deployment, content_repo, project_repo, site_repo, link_repo
):
"""Test internal (to_content_id) vs external (to_url) links"""
# Create T1 articles
t1_articles = []
for i in range(2):
content = content_repo.create(
project_id=project.id,
tier="tier1",
keyword=f"t1_{i}",
title=f"T1 Article {i}",
outline={},
content=f"<p>T1 content {i} about shaft machining.</p>",
word_count=30,
status="generated",
site_deployment_id=site_deployment.id
)
t1_articles.append(content)
article_urls = generate_urls_for_batch(t1_articles, site_repo)
tiered_links = find_tiered_links(t1_articles, None, project_repo, content_repo, site_repo)
inject_interlinks(t1_articles, article_urls, tiered_links, project, None, content_repo, link_repo)
# Check links for first article
outbound = link_repo.get_by_source_article(t1_articles[0].id)
# Tiered link (to money site) should have to_url, not to_content_id
tiered = [l for l in outbound if l.link_type == "tiered"]
assert len(tiered) >= 1
assert tiered[0].to_url is not None
assert tiered[0].to_content_id is None
# See Also links should have to_content_id
see_also = [l for l in outbound if l.link_type == "wheel_see_also"]
for link in see_also:
assert link.to_content_id is not None
assert link.to_content_id in [a.id for a in t1_articles]