491 lines
18 KiB
Python
491 lines
18 KiB
Python
"""
|
|
Integration tests for content injection
|
|
Tests full flow with database
|
|
"""
|
|
|
|
import pytest
|
|
from sqlalchemy import create_engine
|
|
from sqlalchemy.orm import sessionmaker
|
|
from src.database.models import Base, User, Project, SiteDeployment, GeneratedContent, ArticleLink
|
|
from src.database.repositories import (
|
|
ProjectRepository,
|
|
GeneratedContentRepository,
|
|
SiteDeploymentRepository,
|
|
ArticleLinkRepository
|
|
)
|
|
from src.interlinking.content_injection import inject_interlinks
|
|
from src.generation.url_generator import generate_urls_for_batch
|
|
from src.interlinking.tiered_links import find_tiered_links
|
|
|
|
|
|
@pytest.fixture
|
|
def db_session():
|
|
"""Create an in-memory SQLite database for testing"""
|
|
engine = create_engine('sqlite:///:memory:')
|
|
Base.metadata.create_all(engine)
|
|
Session = sessionmaker(bind=engine)
|
|
session = Session()
|
|
yield session
|
|
session.close()
|
|
|
|
|
|
@pytest.fixture
|
|
def user(db_session):
|
|
"""Create a test user"""
|
|
user = User(
|
|
username="testuser",
|
|
hashed_password="hashed_pwd",
|
|
role="Admin"
|
|
)
|
|
db_session.add(user)
|
|
db_session.commit()
|
|
db_session.refresh(user)
|
|
return user
|
|
|
|
|
|
@pytest.fixture
|
|
def project(db_session, user):
|
|
"""Create a test project"""
|
|
project = Project(
|
|
user_id=user.id,
|
|
name="Test Project",
|
|
main_keyword="shaft machining",
|
|
tier=1,
|
|
money_site_url="https://moneysite.com",
|
|
related_searches=["cnc machining", "precision machining"],
|
|
entities=["lathe", "mill", "CNC"]
|
|
)
|
|
db_session.add(project)
|
|
db_session.commit()
|
|
db_session.refresh(project)
|
|
return project
|
|
|
|
|
|
@pytest.fixture
|
|
def site_deployment(db_session):
|
|
"""Create a test site deployment"""
|
|
site = SiteDeployment(
|
|
site_name="Test Site",
|
|
custom_hostname="www.testsite.com",
|
|
storage_zone_id=123,
|
|
storage_zone_name="test-zone",
|
|
storage_zone_password="test-pass",
|
|
storage_zone_region="NY",
|
|
pull_zone_id=456,
|
|
pull_zone_bcdn_hostname="testsite.b-cdn.net"
|
|
)
|
|
db_session.add(site)
|
|
db_session.commit()
|
|
db_session.refresh(site)
|
|
return site
|
|
|
|
|
|
@pytest.fixture
|
|
def content_repo(db_session):
|
|
return GeneratedContentRepository(db_session)
|
|
|
|
|
|
@pytest.fixture
|
|
def project_repo(db_session):
|
|
return ProjectRepository(db_session)
|
|
|
|
|
|
@pytest.fixture
|
|
def site_repo(db_session):
|
|
return SiteDeploymentRepository(db_session)
|
|
|
|
|
|
@pytest.fixture
|
|
def link_repo(db_session):
|
|
return ArticleLinkRepository(db_session)
|
|
|
|
|
|
class TestTier1ContentInjection:
|
|
"""Integration tests for Tier 1 content injection"""
|
|
|
|
def test_tier1_batch_with_money_site_links(
|
|
self, db_session, project, site_deployment, content_repo, project_repo, site_repo, link_repo
|
|
):
|
|
"""Test full flow: create T1 articles, inject money site links, See Also section"""
|
|
# Create 3 tier1 articles
|
|
articles = []
|
|
for i in range(3):
|
|
content = content_repo.create(
|
|
project_id=project.id,
|
|
tier="tier1",
|
|
keyword=f"keyword_{i}",
|
|
title=f"Article {i} About Shaft Machining",
|
|
outline={"sections": ["intro", "body"]},
|
|
content=f"<p>This is article {i} about shaft machining and Home page. Learn about shaft machining here.</p>",
|
|
word_count=50,
|
|
status="generated",
|
|
site_deployment_id=site_deployment.id
|
|
)
|
|
articles.append(content)
|
|
|
|
# Generate URLs
|
|
article_urls = generate_urls_for_batch(articles, site_repo)
|
|
|
|
# Find tiered links
|
|
job_config = None
|
|
tiered_links = find_tiered_links(articles, job_config, project_repo, content_repo, site_repo)
|
|
|
|
assert tiered_links['tier'] == 1
|
|
assert tiered_links['money_site_url'] == "https://moneysite.com"
|
|
|
|
# Inject interlinks
|
|
inject_interlinks(articles, article_urls, tiered_links, project, job_config, content_repo, link_repo)
|
|
|
|
# Verify each article
|
|
for i, article in enumerate(articles):
|
|
db_session.refresh(article)
|
|
|
|
# Should have money site link
|
|
assert '<a href="https://moneysite.com">' in article.content
|
|
|
|
# Should have See Also section
|
|
assert "<h3>See Also</h3>" in article.content
|
|
assert "<ul>" in article.content
|
|
|
|
# Should link to other 2 articles
|
|
other_articles = [a for a in articles if a.id != article.id]
|
|
for other in other_articles:
|
|
assert other.title in article.content
|
|
|
|
# Check ArticleLink records
|
|
outbound_links = link_repo.get_by_source_article(article.id)
|
|
|
|
# Should have 1 tiered (money site) + 2 wheel_see_also links
|
|
assert len(outbound_links) >= 3
|
|
|
|
tiered_links_found = [l for l in outbound_links if l.link_type == "tiered"]
|
|
assert len(tiered_links_found) == 1
|
|
assert tiered_links_found[0].to_url == "https://moneysite.com"
|
|
|
|
see_also_links = [l for l in outbound_links if l.link_type == "wheel_see_also"]
|
|
assert len(see_also_links) == 2
|
|
|
|
def test_tier1_with_homepage_links(
|
|
self, db_session, project, site_deployment, content_repo, project_repo, site_repo, link_repo
|
|
):
|
|
"""Test homepage link injection"""
|
|
# Create 1 tier1 article
|
|
content = content_repo.create(
|
|
project_id=project.id,
|
|
tier="tier1",
|
|
keyword="test_keyword",
|
|
title="Test Article",
|
|
outline={"sections": []},
|
|
content="<p>Content about shaft machining and processes Home today.</p>",
|
|
word_count=30,
|
|
status="generated",
|
|
site_deployment_id=site_deployment.id
|
|
)
|
|
|
|
# Generate URL
|
|
article_urls = generate_urls_for_batch([content], site_repo)
|
|
|
|
# Find tiered links
|
|
tiered_links = find_tiered_links([content], None, project_repo, content_repo, site_repo)
|
|
|
|
# Inject interlinks
|
|
inject_interlinks([content], article_urls, tiered_links, project, None, content_repo, link_repo)
|
|
|
|
db_session.refresh(content)
|
|
|
|
# Should have homepage link with "Home" as anchor text to /index.html
|
|
assert '<a href=' in content.content and 'Home</a>' in content.content
|
|
assert 'index.html">Home</a>' in content.content
|
|
|
|
# Check homepage link in database
|
|
outbound_links = link_repo.get_by_source_article(content.id)
|
|
homepage_links = [l for l in outbound_links if l.link_type == "homepage"]
|
|
assert len(homepage_links) >= 1
|
|
|
|
|
|
class TestTier2ContentInjection:
|
|
"""Integration tests for Tier 2 content injection"""
|
|
|
|
def test_tier2_links_to_tier1(
|
|
self, db_session, project, site_deployment, content_repo, project_repo, site_repo, link_repo
|
|
):
|
|
"""Test T2 articles linking to T1 articles"""
|
|
# Create 5 tier1 articles
|
|
t1_articles = []
|
|
for i in range(5):
|
|
content = content_repo.create(
|
|
project_id=project.id,
|
|
tier="tier1",
|
|
keyword=f"t1_keyword_{i}",
|
|
title=f"T1 Article {i}",
|
|
outline={"sections": []},
|
|
content=f"<p>T1 article {i} content about shaft machining.</p>",
|
|
word_count=30,
|
|
status="generated",
|
|
site_deployment_id=site_deployment.id
|
|
)
|
|
t1_articles.append(content)
|
|
|
|
# Create 3 tier2 articles
|
|
t2_articles = []
|
|
for i in range(3):
|
|
content = content_repo.create(
|
|
project_id=project.id,
|
|
tier="tier2",
|
|
keyword=f"t2_keyword_{i}",
|
|
title=f"T2 Article {i}",
|
|
outline={"sections": []},
|
|
content=f"<p>T2 article {i} with cnc machining and precision machining content here.</p>",
|
|
word_count=40,
|
|
status="generated",
|
|
site_deployment_id=site_deployment.id
|
|
)
|
|
t2_articles.append(content)
|
|
|
|
# Generate URLs for T2 articles
|
|
article_urls = generate_urls_for_batch(t2_articles, site_repo)
|
|
|
|
# Find tiered links for T2
|
|
tiered_links = find_tiered_links(t2_articles, None, project_repo, content_repo, site_repo)
|
|
|
|
assert tiered_links['tier'] == 2
|
|
assert tiered_links['lower_tier'] == 1
|
|
assert len(tiered_links['lower_tier_urls']) >= 2 # Should select 2-4 random T1 URLs
|
|
|
|
# Inject interlinks
|
|
inject_interlinks(t2_articles, article_urls, tiered_links, project, None, content_repo, link_repo)
|
|
|
|
# Verify T2 articles
|
|
for article in t2_articles:
|
|
db_session.refresh(article)
|
|
|
|
# Should have links to T1 articles
|
|
assert '<a href=' in article.content
|
|
|
|
# Should have See Also section
|
|
assert "<h3>See Also</h3>" in article.content
|
|
|
|
# Check ArticleLink records
|
|
outbound_links = link_repo.get_by_source_article(article.id)
|
|
|
|
# Should have tiered links + see_also links
|
|
tiered_links_found = [l for l in outbound_links if l.link_type == "tiered"]
|
|
assert len(tiered_links_found) >= 2 # At least 2 links to T1
|
|
|
|
# All tiered links should point to T1 articles
|
|
for link in tiered_links_found:
|
|
assert link.to_url is not None # External URL
|
|
|
|
|
|
class TestAnchorTextConfigOverrides:
|
|
"""Integration tests for anchor text config overrides"""
|
|
|
|
def test_override_mode(
|
|
self, db_session, project, site_deployment, content_repo, project_repo, site_repo, link_repo
|
|
):
|
|
"""Test anchor text override mode"""
|
|
content = content_repo.create(
|
|
project_id=project.id,
|
|
tier="tier1",
|
|
keyword="test",
|
|
title="Test Article",
|
|
outline={},
|
|
content="<p>Content with custom anchor and click here for more info text.</p>",
|
|
word_count=30,
|
|
status="generated",
|
|
site_deployment_id=site_deployment.id
|
|
)
|
|
|
|
article_urls = generate_urls_for_batch([content], site_repo)
|
|
tiered_links = find_tiered_links([content], None, project_repo, content_repo, site_repo)
|
|
|
|
# Override anchor text
|
|
job_config = {
|
|
"anchor_text_config": {
|
|
"mode": "override",
|
|
"custom_text": ["custom anchor", "click here for more info"]
|
|
}
|
|
}
|
|
|
|
inject_interlinks([content], article_urls, tiered_links, project, job_config, content_repo, link_repo)
|
|
|
|
db_session.refresh(content)
|
|
|
|
# Should use custom anchor text
|
|
assert '<a href=' in content.content
|
|
|
|
def test_append_mode(
|
|
self, db_session, project, site_deployment, content_repo, project_repo, site_repo, link_repo
|
|
):
|
|
"""Test anchor text append mode"""
|
|
content = content_repo.create(
|
|
project_id=project.id,
|
|
tier="tier1",
|
|
keyword="test",
|
|
title="Test",
|
|
outline={},
|
|
content="<p>Article about shaft machining with custom content here.</p>",
|
|
word_count=30,
|
|
status="generated",
|
|
site_deployment_id=site_deployment.id
|
|
)
|
|
|
|
article_urls = generate_urls_for_batch([content], site_repo)
|
|
tiered_links = find_tiered_links([content], None, project_repo, content_repo, site_repo)
|
|
|
|
job_config = {
|
|
"anchor_text_config": {
|
|
"mode": "append",
|
|
"custom_text": ["custom content"]
|
|
}
|
|
}
|
|
|
|
inject_interlinks([content], article_urls, tiered_links, project, job_config, content_repo, link_repo)
|
|
|
|
db_session.refresh(content)
|
|
assert '<a href=' in content.content
|
|
|
|
|
|
class TestDifferentBatchSizes:
|
|
"""Test with various batch sizes"""
|
|
|
|
def test_single_article_batch(
|
|
self, db_session, project, site_deployment, content_repo, project_repo, site_repo, link_repo
|
|
):
|
|
"""Test batch with single article"""
|
|
content = content_repo.create(
|
|
project_id=project.id,
|
|
tier="tier1",
|
|
keyword="test",
|
|
title="Single Article",
|
|
outline={},
|
|
content="<p>Content about shaft machining and Home information.</p>",
|
|
word_count=30,
|
|
status="generated",
|
|
site_deployment_id=site_deployment.id
|
|
)
|
|
|
|
article_urls = generate_urls_for_batch([content], site_repo)
|
|
tiered_links = find_tiered_links([content], None, project_repo, content_repo, site_repo)
|
|
|
|
inject_interlinks([content], article_urls, tiered_links, project, None, content_repo, link_repo)
|
|
|
|
db_session.refresh(content)
|
|
|
|
# Should have money site link (using "shaft machining" anchor)
|
|
assert '<a href="https://moneysite.com">' in content.content
|
|
|
|
# Should have homepage link (using "Home" anchor to /index.html)
|
|
assert 'index.html">Home</a>' in content.content
|
|
|
|
def test_large_batch(
|
|
self, db_session, project, site_deployment, content_repo, project_repo, site_repo, link_repo
|
|
):
|
|
"""Test batch with 20 articles"""
|
|
articles = []
|
|
for i in range(20):
|
|
content = content_repo.create(
|
|
project_id=project.id,
|
|
tier="tier1",
|
|
keyword=f"kw_{i}",
|
|
title=f"Article {i}",
|
|
outline={},
|
|
content=f"<p>Article {i} about shaft machining processes.</p>",
|
|
word_count=30,
|
|
status="generated",
|
|
site_deployment_id=site_deployment.id
|
|
)
|
|
articles.append(content)
|
|
|
|
article_urls = generate_urls_for_batch(articles, site_repo)
|
|
tiered_links = find_tiered_links(articles, None, project_repo, content_repo, site_repo)
|
|
|
|
inject_interlinks(articles, article_urls, tiered_links, project, None, content_repo, link_repo)
|
|
|
|
# Verify first article has 19 See Also links
|
|
first_article = articles[0]
|
|
db_session.refresh(first_article)
|
|
|
|
assert "<h3>See Also</h3>" in first_article.content
|
|
|
|
outbound_links = link_repo.get_by_source_article(first_article.id)
|
|
see_also_links = [l for l in outbound_links if l.link_type == "wheel_see_also"]
|
|
assert len(see_also_links) == 19
|
|
|
|
|
|
class TestLinkDatabaseRecords:
|
|
"""Test ArticleLink database records"""
|
|
|
|
def test_all_link_types_recorded(
|
|
self, db_session, project, site_deployment, content_repo, project_repo, site_repo, link_repo
|
|
):
|
|
"""Test that all link types are properly recorded"""
|
|
articles = []
|
|
for i in range(3):
|
|
content = content_repo.create(
|
|
project_id=project.id,
|
|
tier="tier1",
|
|
keyword=f"kw_{i}",
|
|
title=f"Article {i}",
|
|
outline={},
|
|
content=f"<p>Content {i} about shaft machining here.</p>",
|
|
word_count=30,
|
|
status="generated",
|
|
site_deployment_id=site_deployment.id
|
|
)
|
|
articles.append(content)
|
|
|
|
article_urls = generate_urls_for_batch(articles, site_repo)
|
|
tiered_links = find_tiered_links(articles, None, project_repo, content_repo, site_repo)
|
|
|
|
inject_interlinks(articles, article_urls, tiered_links, project, None, content_repo, link_repo)
|
|
|
|
# Check all link types exist
|
|
all_tiered = link_repo.get_by_link_type("tiered")
|
|
all_homepage = link_repo.get_by_link_type("homepage")
|
|
all_see_also = link_repo.get_by_link_type("wheel_see_also")
|
|
|
|
assert len(all_tiered) >= 3 # At least 1 per article
|
|
assert len(all_see_also) >= 6 # Each article links to 2 others
|
|
|
|
def test_internal_vs_external_links(
|
|
self, db_session, project, site_deployment, content_repo, project_repo, site_repo, link_repo
|
|
):
|
|
"""Test internal (to_content_id) vs external (to_url) links"""
|
|
# Create T1 articles
|
|
t1_articles = []
|
|
for i in range(2):
|
|
content = content_repo.create(
|
|
project_id=project.id,
|
|
tier="tier1",
|
|
keyword=f"t1_{i}",
|
|
title=f"T1 Article {i}",
|
|
outline={},
|
|
content=f"<p>T1 content {i} about shaft machining.</p>",
|
|
word_count=30,
|
|
status="generated",
|
|
site_deployment_id=site_deployment.id
|
|
)
|
|
t1_articles.append(content)
|
|
|
|
article_urls = generate_urls_for_batch(t1_articles, site_repo)
|
|
tiered_links = find_tiered_links(t1_articles, None, project_repo, content_repo, site_repo)
|
|
|
|
inject_interlinks(t1_articles, article_urls, tiered_links, project, None, content_repo, link_repo)
|
|
|
|
# Check links for first article
|
|
outbound = link_repo.get_by_source_article(t1_articles[0].id)
|
|
|
|
# Tiered link (to money site) should have to_url, not to_content_id
|
|
tiered = [l for l in outbound if l.link_type == "tiered"]
|
|
assert len(tiered) >= 1
|
|
assert tiered[0].to_url is not None
|
|
assert tiered[0].to_content_id is None
|
|
|
|
# See Also links should have to_content_id
|
|
see_also = [l for l in outbound if l.link_type == "wheel_see_also"]
|
|
for link in see_also:
|
|
assert link.to_content_id is not None
|
|
assert link.to_content_id in [a.id for a in t1_articles]
|
|
|