Big-Link-Man/scripts/backfill_site_pages.py

159 lines
5.0 KiB
Python

#!/usr/bin/env python3
"""
Backfill script to generate boilerplate pages for existing sites
"""
import sys
import argparse
import logging
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent))
from src.database.connection import DatabaseConnection
from src.database.repositories import SiteDeploymentRepository, SitePageRepository
from src.templating.service import TemplateService
from src.generation.site_page_generator import generate_site_pages
from src.auth.auth_service import AuthService
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
def backfill_site_pages(
username: str,
password: str,
template: str = "basic",
dry_run: bool = False,
batch_size: int = 100
):
"""
Generate boilerplate pages for all sites that don't have them
Args:
username: Admin username for authentication
password: Admin password
template: Template to use (default: basic)
dry_run: If True, only preview changes without applying
batch_size: Number of sites to process between progress updates
"""
db = DatabaseConnection()
session = db.get_session()
auth_service = AuthService(session)
user = auth_service.authenticate(username, password)
if not user or not user.is_admin():
logger.error("Authentication failed or insufficient permissions")
sys.exit(1)
logger.info("Authenticated as admin user")
try:
site_repo = SiteDeploymentRepository(session)
page_repo = SitePageRepository(session)
template_service = TemplateService()
all_sites = site_repo.get_all()
logger.info(f"Found {len(all_sites)} total sites in database")
sites_needing_pages = []
for site in all_sites:
existing_pages = page_repo.get_by_site(site.id)
if len(existing_pages) < 3:
sites_needing_pages.append(site)
logger.info(f"Found {len(sites_needing_pages)} sites without boilerplate pages")
if dry_run:
logger.info("[DRY RUN] Preview of changes:")
for site in sites_needing_pages:
domain = site.custom_hostname or site.pull_zone_bcdn_hostname
logger.info(f" [DRY RUN] Would generate pages for site {site.id} ({domain})")
logger.info(f"[DRY RUN] Total: {len(sites_needing_pages)} sites would be updated")
return
successful = 0
failed = 0
for idx, site in enumerate(sites_needing_pages, 1):
domain = site.custom_hostname or site.pull_zone_bcdn_hostname
try:
existing_pages = page_repo.get_by_site(site.id)
existing_types = {p.page_type for p in existing_pages}
missing_types = {"about", "contact", "privacy"} - existing_types
if missing_types:
logger.info(f"[{idx}/{len(sites_needing_pages)}] Generating pages for site {site.id} ({domain})")
generate_site_pages(site, template, page_repo, template_service)
successful += 1
else:
logger.info(f"[{idx}/{len(sites_needing_pages)}] Site {site.id} already has all pages, skipping")
except Exception as e:
logger.error(f"Failed to generate pages for site {site.id}: {e}")
failed += 1
if idx % batch_size == 0:
logger.info(f"Progress: {idx}/{len(sites_needing_pages)} sites processed")
logger.info(f"Complete: {successful} successful, {failed} failed")
except Exception as e:
logger.error(f"Backfill failed: {e}")
raise
finally:
session.close()
def main():
parser = argparse.ArgumentParser(
description="Backfill boilerplate pages for existing sites"
)
parser.add_argument(
"--username",
required=True,
help="Admin username for authentication"
)
parser.add_argument(
"--password",
required=True,
help="Admin password"
)
parser.add_argument(
"--template",
default="basic",
choices=["basic", "modern", "classic", "minimal"],
help="Template to use for pages (default: basic)"
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Preview changes without applying them"
)
parser.add_argument(
"--batch-size",
type=int,
default=100,
help="Number of sites to process between progress updates (default: 100)"
)
args = parser.parse_args()
backfill_site_pages(
username=args.username,
password=args.password,
template=args.template,
dry_run=args.dry_run,
batch_size=args.batch_size
)
if __name__ == "__main__":
main()