diff --git a/src/generation/batch_processor.py b/src/generation/batch_processor.py index 84cee97..ad146ee 100644 --- a/src/generation/batch_processor.py +++ b/src/generation/batch_processor.py @@ -386,7 +386,13 @@ class BatchProcessor: ) if assigned_site: site_deployment_id = assigned_site.id - hostname = assigned_site.custom_hostname or assigned_site.pull_zone_bcdn_hostname + # For S3 sites, prefer s3_custom_domain over pull_zone_bcdn_hostname + if assigned_site.storage_provider in ('s3', 's3_compatible') and assigned_site.s3_custom_domain: + hostname = assigned_site.s3_custom_domain + elif assigned_site.storage_provider in ('s3', 's3_compatible') and assigned_site.s3_bucket_name and assigned_site.s3_bucket_region: + hostname = f"{assigned_site.s3_bucket_name}.s3.{assigned_site.s3_bucket_region}.amazonaws.com" + else: + hostname = assigned_site.custom_hostname or assigned_site.pull_zone_bcdn_hostname click.echo(f"{prefix} Assigned to site: {hostname} (ID: {site_deployment_id})") # Update the article with the assigned site saved_content.site_deployment_id = site_deployment_id @@ -883,7 +889,13 @@ class BatchProcessor: ) if assigned_site: site_deployment_id = assigned_site.id - hostname = assigned_site.custom_hostname or assigned_site.pull_zone_bcdn_hostname + # For S3 sites, prefer s3_custom_domain over pull_zone_bcdn_hostname + if assigned_site.storage_provider in ('s3', 's3_compatible') and assigned_site.s3_custom_domain: + hostname = assigned_site.s3_custom_domain + elif assigned_site.storage_provider in ('s3', 's3_compatible') and assigned_site.s3_bucket_name and assigned_site.s3_bucket_region: + hostname = f"{assigned_site.s3_bucket_name}.s3-website-{assigned_site.s3_bucket_region}.amazonaws.com" + else: + hostname = assigned_site.custom_hostname or assigned_site.pull_zone_bcdn_hostname click.echo(f"{prefix} Assigned to site: {hostname} (ID: {site_deployment_id})") # Update the article with the assigned site saved_content.site_deployment_id = site_deployment_id diff --git a/src/generation/site_page_generator.py b/src/generation/site_page_generator.py index 2e12d4f..d353557 100644 --- a/src/generation/site_page_generator.py +++ b/src/generation/site_page_generator.py @@ -8,26 +8,11 @@ from src.database.models import SiteDeployment, SitePage from src.database.repositories import SitePageRepository from src.templating.service import TemplateService from src.generation.page_templates import get_page_content +from src.generation.url_generator import get_site_hostname logger = logging.getLogger(__name__) -def get_domain_from_site(site_deployment: SiteDeployment) -> str: - """ - Extract domain from site deployment for use in page content - - Args: - site_deployment: SiteDeployment record - - Returns: - Domain string (custom hostname or b-cdn hostname) - """ - if site_deployment.custom_hostname: - return site_deployment.custom_hostname - else: - return site_deployment.pull_zone_bcdn_hostname - - def generate_site_pages( site_deployment: SiteDeployment, page_repo: SitePageRepository, @@ -48,7 +33,7 @@ def generate_site_pages( ValueError: If pages already exist for this site Exception: If page generation fails """ - domain = get_domain_from_site(site_deployment) + domain = get_site_hostname(site_deployment) template_name = site_deployment.template_name or "basic" page_types = ["about", "contact", "privacy"] diff --git a/src/generation/url_generator.py b/src/generation/url_generator.py index d17790a..6bd2922 100644 --- a/src/generation/url_generator.py +++ b/src/generation/url_generator.py @@ -11,6 +11,38 @@ from src.database.repositories import SiteDeploymentRepository logger = logging.getLogger(__name__) +def get_site_hostname(site: SiteDeployment) -> str: + """ + Get the proper hostname for a site, handling S3 sites correctly. + + For S3 sites: + - Uses s3_custom_domain if set + - Otherwise constructs S3 website endpoint URL + - Never uses pull_zone_bcdn_hostname (Bunny domain) for S3 sites + + For other providers (Bunny, etc.): + - Uses custom_hostname if set + - Falls back to pull_zone_bcdn_hostname + + Args: + site: SiteDeployment record + + Returns: + Hostname string for the site + """ + if site.storage_provider in ('s3', 's3_compatible'): + if site.s3_custom_domain: + return site.s3_custom_domain + elif site.s3_bucket_name and site.s3_bucket_region: + return f"{site.s3_bucket_name}.s3.{site.s3_bucket_region}.amazonaws.com" + else: + hostname = site.custom_hostname or site.pull_zone_bcdn_hostname + logger.warning(f"S3 site {site.id} missing s3_custom_domain and bucket info, using fallback: {hostname}") + return hostname + else: + return site.custom_hostname or site.pull_zone_bcdn_hostname + + def generate_slug(title: str, max_length: int = 100) -> str: """ Generate URL-safe slug from article title @@ -67,7 +99,7 @@ def generate_urls_for_batch( f"Site deployment ID {content.site_deployment_id} not found for content ID {content.id}" ) - hostname = site.custom_hostname or site.pull_zone_bcdn_hostname + hostname = get_site_hostname(site) slug = generate_slug(content.title) if not slug or slug == "article": @@ -109,8 +141,14 @@ def generate_public_url(site: SiteDeployment, file_path: str) -> str: site with pull_zone_bcdn_hostname='mysite.b-cdn.net', file_path='article.html' -> 'https://mysite.b-cdn.net/article.html' + + S3 site with s3_custom_domain='cdn.example.com', file_path='article.html' + -> 'https://cdn.example.com/article.html' + + S3 site without custom domain, file_path='article.html' + -> 'https://bucket-name.s3.region.amazonaws.com/article.html' """ - hostname = site.custom_hostname or site.pull_zone_bcdn_hostname + hostname = get_site_hostname(site) return f"https://{hostname}/{file_path}"