""" URL generation logic for generated content """ import re import logging from typing import List, Dict from src.database.models import GeneratedContent from src.database.repositories import SiteDeploymentRepository logger = logging.getLogger(__name__) def generate_slug(title: str, max_length: int = 100) -> str: """ Generate URL-safe slug from article title Args: title: Article title max_length: Maximum slug length (default: 100) Returns: URL-safe slug Examples: "How to Fix Your Engine" -> "how-to-fix-your-engine" "10 Best SEO Tips for 2024!" -> "10-best-seo-tips-for-2024" "C++ Programming Guide" -> "c-programming-guide" """ slug = title.lower() slug = re.sub(r'[^\w\s-]', '', slug) slug = re.sub(r'[-\s]+', '-', slug) slug = slug.strip('-')[:max_length] return slug or "article" def generate_urls_for_batch( content_records: List[GeneratedContent], site_repo: SiteDeploymentRepository ) -> List[Dict]: """ Generate final public URLs for a batch of articles Args: content_records: List of GeneratedContent records (all should have site_deployment_id set) site_repo: SiteDeploymentRepository for looking up site details Returns: List of URL mappings: [{content_id, title, url, tier, slug}, ...] Raises: ValueError: If any article is missing site_deployment_id or site lookup fails """ url_mappings = [] for content in content_records: if not content.site_deployment_id: raise ValueError( f"Content ID {content.id} is missing site_deployment_id. " "All articles must be assigned to a site before URL generation." ) site = site_repo.get_by_id(content.site_deployment_id) if not site: raise ValueError( f"Site deployment ID {content.site_deployment_id} not found for content ID {content.id}" ) hostname = site.custom_hostname or site.pull_zone_bcdn_hostname slug = generate_slug(content.title) if not slug or slug == "article": slug = f"article-{content.id}" logger.warning( f"Empty slug generated for content ID {content.id}, using fallback: {slug}" ) url = f"https://{hostname}/{slug}.html" url_mappings.append({ "content_id": content.id, "title": content.title, "url": url, "tier": content.tier, "slug": slug, "hostname": hostname }) logger.info(f"Generated URL for content_id={content.id}: {url}") return url_mappings