Big-Link-Man/tests/integration/test_story_3_1_integration.py

337 lines
11 KiB
Python

"""
Integration tests for Story 3.1: URL Generation and Site Assignment
"""
import pytest
from unittest.mock import Mock, patch
from src.database.models import GeneratedContent, SiteDeployment, Project
from src.database.repositories import SiteDeploymentRepository, GeneratedContentRepository
from src.generation.job_config import Job
from src.generation.site_assignment import assign_sites_to_batch
from src.generation.url_generator import generate_urls_for_batch
from src.generation.site_provisioning import provision_keyword_sites, create_generic_sites
from src.deployment.bunnynet import StorageZoneResult, PullZoneResult
@pytest.fixture
def mock_bunny_client():
"""Mock bunny.net client"""
client = Mock()
storage_id_counter = [100]
pull_id_counter = [200]
def create_storage(name, region):
storage_id_counter[0] += 1
return StorageZoneResult(
id=storage_id_counter[0],
name=name,
password="test_password",
region=region
)
def create_pull(name, storage_zone_id):
pull_id_counter[0] += 1
return PullZoneResult(
id=pull_id_counter[0],
name=name,
hostname=f"{name}.b-cdn.net"
)
client.create_storage_zone = Mock(side_effect=create_storage)
client.create_pull_zone = Mock(side_effect=create_pull)
return client
class TestFullWorkflow:
"""Integration tests for complete Story 3.1 workflow"""
def test_full_flow_with_existing_sites(self, db_session):
"""Test assignment and URL generation with existing sites"""
site_repo = SiteDeploymentRepository(db_session)
content_repo = GeneratedContentRepository(db_session)
# Create sites with different configurations
site1 = site_repo.create(
site_name="site1",
storage_zone_id=1,
storage_zone_name="site1",
storage_zone_password="pass1",
storage_zone_region="DE",
pull_zone_id=10,
pull_zone_bcdn_hostname="site1.b-cdn.net",
custom_hostname="www.custom1.com"
)
site2 = site_repo.create(
site_name="site2",
storage_zone_id=2,
storage_zone_name="site2",
storage_zone_password="pass2",
storage_zone_region="DE",
pull_zone_id=20,
pull_zone_bcdn_hostname="site2.b-cdn.net",
custom_hostname=None
)
# Create project first
from src.database.repositories import ProjectRepository
project_repo = ProjectRepository(db_session)
project = project_repo.create(
user_id=1,
name="Test Project",
data={"main_keyword": "test keyword"}
)
# Create content records
content1 = content_repo.create(
project_id=project.id,
tier="tier1",
keyword="engine",
title="How to Fix Your Engine",
outline={"sections": []},
content="<p>Test content</p>",
word_count=100,
status="generated"
)
content2 = content_repo.create(
project_id=project.id,
tier="tier2",
keyword="car",
title="Car Maintenance Guide",
outline={"sections": []},
content="<p>Test content 2</p>",
word_count=150,
status="generated"
)
# Create job config
job = Job(
project_id=project.id,
tiers={},
deployment_targets=None,
tier1_preferred_sites=None,
auto_create_sites=False,
create_sites_for_keywords=None
)
bunny_client = Mock()
# Assign sites
assign_sites_to_batch(
[content1, content2],
job,
site_repo,
bunny_client,
"test-project"
)
# Verify assignments
db_session.refresh(content1)
db_session.refresh(content2)
assert content1.site_deployment_id is not None
assert content2.site_deployment_id is not None
assert content1.site_deployment_id != content2.site_deployment_id
# Generate URLs
urls = generate_urls_for_batch([content1, content2], site_repo)
assert len(urls) == 2
assert all(url["url"].startswith("https://") for url in urls)
assert all(url["url"].endswith(".html") for url in urls)
# Verify one uses custom hostname and one uses bcdn
hostnames = [url["hostname"] for url in urls]
assert "www.custom1.com" in hostnames or "site2.b-cdn.net" in hostnames
def test_tier1_preferred_sites_priority(self, db_session):
"""Test that tier1 articles get preferred sites first"""
site_repo = SiteDeploymentRepository(db_session)
content_repo = GeneratedContentRepository(db_session)
# Create preferred site
preferred = site_repo.create(
site_name="preferred",
storage_zone_id=1,
storage_zone_name="preferred",
storage_zone_password="pass",
storage_zone_region="DE",
pull_zone_id=10,
pull_zone_bcdn_hostname="preferred.b-cdn.net",
custom_hostname="www.preferred.com"
)
# Create regular site
regular = site_repo.create(
site_name="regular",
storage_zone_id=2,
storage_zone_name="regular",
storage_zone_password="pass",
storage_zone_region="DE",
pull_zone_id=20,
pull_zone_bcdn_hostname="regular.b-cdn.net",
custom_hostname=None
)
# Create project
from src.database.repositories import ProjectRepository
project_repo = ProjectRepository(db_session)
project = project_repo.create(
user_id=1,
name="Test Project",
data={"main_keyword": "test"}
)
# Create tier1 content
content1 = content_repo.create(
project_id=project.id,
tier="tier1",
keyword="test",
title="Tier 1 Article",
outline={},
content="<p>Test</p>",
word_count=100,
status="generated"
)
job = Job(
project_id=project.id,
tiers={},
tier1_preferred_sites=["www.preferred.com"],
auto_create_sites=False
)
bunny_client = Mock()
assign_sites_to_batch([content1], job, site_repo, bunny_client, "test")
db_session.refresh(content1)
# Should get preferred site
assert content1.site_deployment_id == preferred.id
def test_auto_create_when_insufficient_sites(self, db_session, mock_bunny_client):
"""Test auto-creation of sites when pool is insufficient"""
site_repo = SiteDeploymentRepository(db_session)
content_repo = GeneratedContentRepository(db_session)
# Create project
from src.database.repositories import ProjectRepository
project_repo = ProjectRepository(db_session)
project = project_repo.create(
user_id=1,
name="Test Project",
data={"main_keyword": "test keyword"}
)
# Create 3 articles but no sites
contents = []
for i in range(3):
content = content_repo.create(
project_id=project.id,
tier="tier1",
keyword="test",
title=f"Article {i}",
outline={},
content="<p>Test</p>",
word_count=100,
status="generated"
)
contents.append(content)
job = Job(
project_id=project.id,
tiers={},
auto_create_sites=True
)
assign_sites_to_batch(contents, job, site_repo, mock_bunny_client, "test-project")
# Should have created 3 sites
assert mock_bunny_client.create_storage_zone.call_count == 3
assert mock_bunny_client.create_pull_zone.call_count == 3
# All content should be assigned
for content in contents:
db_session.refresh(content)
assert content.site_deployment_id is not None
def test_keyword_site_provisioning(self, db_session, mock_bunny_client):
"""Test pre-creation of keyword sites"""
site_repo = SiteDeploymentRepository(db_session)
keywords = [
{"keyword": "engine repair", "count": 2},
{"keyword": "car maintenance", "count": 1}
]
sites = provision_keyword_sites(keywords, mock_bunny_client, site_repo)
assert len(sites) == 3
assert all(site.custom_hostname is None for site in sites)
assert all(site.pull_zone_bcdn_hostname.endswith(".b-cdn.net") for site in sites)
# Check names contain keywords
site_names = [site.site_name for site in sites]
engine_sites = [n for n in site_names if "engine-repair" in n]
car_sites = [n for n in site_names if "car-maintenance" in n]
assert len(engine_sites) == 2
assert len(car_sites) == 1
def test_url_generation_with_various_titles(self, db_session):
"""Test URL generation with different title formats"""
site_repo = SiteDeploymentRepository(db_session)
content_repo = GeneratedContentRepository(db_session)
site = site_repo.create(
site_name="test",
storage_zone_id=1,
storage_zone_name="test",
storage_zone_password="pass",
storage_zone_region="DE",
pull_zone_id=10,
pull_zone_bcdn_hostname="test.b-cdn.net",
custom_hostname=None
)
from src.database.repositories import ProjectRepository
project_repo = ProjectRepository(db_session)
project = project_repo.create(
user_id=1,
name="Test",
data={"main_keyword": "test"}
)
test_cases = [
("How to Fix Your Engine", "how-to-fix-your-engine"),
("10 Best SEO Tips for 2024!", "10-best-seo-tips-for-2024"),
("C++ Programming", "c-programming"),
("!!!Special!!!", "special")
]
contents = []
for title, expected_slug in test_cases:
content = content_repo.create(
project_id=project.id,
tier="tier1",
keyword="test",
title=title,
outline={},
content="<p>Test</p>",
word_count=100,
status="generated",
site_deployment_id=site.id
)
contents.append((content, expected_slug))
urls = generate_urls_for_batch([c[0] for c in contents], site_repo)
for i, (content, expected_slug) in enumerate(contents):
assert urls[i]["slug"] == expected_slug
assert urls[i]["url"] == f"https://test.b-cdn.net/{expected_slug}.html"