94 lines
2.7 KiB
Python
94 lines
2.7 KiB
Python
"""
|
|
URL generation logic for generated content
|
|
"""
|
|
|
|
import re
|
|
import logging
|
|
from typing import List, Dict
|
|
from src.database.models import GeneratedContent
|
|
from src.database.repositories import SiteDeploymentRepository
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def generate_slug(title: str, max_length: int = 100) -> str:
|
|
"""
|
|
Generate URL-safe slug from article title
|
|
|
|
Args:
|
|
title: Article title
|
|
max_length: Maximum slug length (default: 100)
|
|
|
|
Returns:
|
|
URL-safe slug
|
|
|
|
Examples:
|
|
"How to Fix Your Engine" -> "how-to-fix-your-engine"
|
|
"10 Best SEO Tips for 2024!" -> "10-best-seo-tips-for-2024"
|
|
"C++ Programming Guide" -> "c-programming-guide"
|
|
"""
|
|
slug = title.lower()
|
|
slug = re.sub(r'[^\w\s-]', '', slug)
|
|
slug = re.sub(r'[-\s]+', '-', slug)
|
|
slug = slug.strip('-')[:max_length]
|
|
|
|
return slug or "article"
|
|
|
|
|
|
def generate_urls_for_batch(
|
|
content_records: List[GeneratedContent],
|
|
site_repo: SiteDeploymentRepository
|
|
) -> List[Dict]:
|
|
"""
|
|
Generate final public URLs for a batch of articles
|
|
|
|
Args:
|
|
content_records: List of GeneratedContent records (all should have site_deployment_id set)
|
|
site_repo: SiteDeploymentRepository for looking up site details
|
|
|
|
Returns:
|
|
List of URL mappings: [{content_id, title, url, tier, slug}, ...]
|
|
|
|
Raises:
|
|
ValueError: If any article is missing site_deployment_id or site lookup fails
|
|
"""
|
|
url_mappings = []
|
|
|
|
for content in content_records:
|
|
if not content.site_deployment_id:
|
|
raise ValueError(
|
|
f"Content ID {content.id} is missing site_deployment_id. "
|
|
"All articles must be assigned to a site before URL generation."
|
|
)
|
|
|
|
site = site_repo.get_by_id(content.site_deployment_id)
|
|
if not site:
|
|
raise ValueError(
|
|
f"Site deployment ID {content.site_deployment_id} not found for content ID {content.id}"
|
|
)
|
|
|
|
hostname = site.custom_hostname or site.pull_zone_bcdn_hostname
|
|
slug = generate_slug(content.title)
|
|
|
|
if not slug or slug == "article":
|
|
slug = f"article-{content.id}"
|
|
logger.warning(
|
|
f"Empty slug generated for content ID {content.id}, using fallback: {slug}"
|
|
)
|
|
|
|
url = f"https://{hostname}/{slug}.html"
|
|
|
|
url_mappings.append({
|
|
"content_id": content.id,
|
|
"title": content.title,
|
|
"url": url,
|
|
"tier": content.tier,
|
|
"slug": slug,
|
|
"hostname": hostname
|
|
})
|
|
|
|
logger.info(f"Generated URL for content_id={content.id}: {url}")
|
|
|
|
return url_mappings
|
|
|