""" Post-process existing articles that were generated but not fully processed. This script applies post-processing steps to articles that are already in the database: - Site assignment (if needed) - URL generation - Tiered link discovery - Interlink injection - Template application Usage: uv run python scripts/post_process_batch.py --project-id 1 --tier tier1 tier2 uv run python scripts/post_process_batch.py --project-id 1 --all-tiers """ import sys from pathlib import Path project_root = Path(__file__).parent.parent sys.path.insert(0, str(project_root)) import click import os from src.database.session import db_manager from src.database.repositories import ( GeneratedContentRepository, ProjectRepository, SiteDeploymentRepository, ArticleLinkRepository, SitePageRepository ) from src.generation.ai_client import AIClient, PromptManager from src.generation.service import ContentGenerator from src.generation.url_generator import generate_urls_for_batch from src.generation.site_assignment import assign_sites_to_batch from src.generation.job_config import Job, InterlinkingConfig from src.interlinking.tiered_links import find_tiered_links from src.interlinking.content_injection import inject_interlinks @click.command() @click.option('--project-id', '-p', required=True, type=int, help='Project ID to post-process') @click.option('--tier', '-t', multiple=True, help='Tiers to process (e.g., tier1, tier2)') @click.option('--all-tiers', is_flag=True, help='Process all tiers') @click.option('--skip-site-assignment', is_flag=True, help='Skip site assignment step') @click.option('--skip-urls', is_flag=True, help='Skip URL generation') @click.option('--skip-interlinks', is_flag=True, help='Skip interlinking') @click.option('--skip-templates', is_flag=True, help='Skip template application') def post_process_batch( project_id: int, tier: tuple, all_tiers: bool, skip_site_assignment: bool, skip_urls: bool, skip_interlinks: bool, skip_templates: bool ): """Post-process existing articles in the database""" if not tier and not all_tiers: click.echo("Error: Must specify either --tier or --all-tiers", err=True) return session = db_manager.get_session() try: project_repo = ProjectRepository(session) content_repo = GeneratedContentRepository(session) site_repo = SiteDeploymentRepository(session) link_repo = ArticleLinkRepository(session) project = project_repo.get_by_id(project_id) if not project: click.echo(f"Error: Project {project_id} not found", err=True) return if not project.money_site_url: click.echo(f"Error: Project {project_id} has no money_site_url set", err=True) click.echo("Please set money_site_url before post-processing", err=True) return click.echo(f"\nPost-processing project: {project.name} (ID: {project_id})") click.echo(f"Keyword: {project.main_keyword}") click.echo(f"Money site: {project.money_site_url}\n") tiers_to_process = [] if all_tiers: all_articles = content_repo.get_by_project_id(project_id) tiers_to_process = sorted(set(a.tier for a in all_articles)) click.echo(f"Found tiers: {', '.join(tiers_to_process)}\n") else: tiers_to_process = list(tier) api_key = os.getenv("OPENROUTER_API_KEY") if not api_key: click.echo("Error: OPENROUTER_API_KEY not found in environment", err=True) return ai_client = AIClient(api_key=api_key, model='gpt-4o-mini') prompt_manager = PromptManager() content_generator = ContentGenerator( ai_client=ai_client, prompt_manager=prompt_manager, project_repo=project_repo, content_repo=content_repo, site_deployment_repo=site_repo ) job = Job( project_id=project_id, tiers={}, interlinking=InterlinkingConfig( links_per_article_min=2, links_per_article_max=4, include_home_link=True ) ) for tier_name in tiers_to_process: click.echo(f"Processing {tier_name}...") all_articles = content_repo.get_by_project_and_tier( project_id, tier_name, require_site=False ) if not all_articles: click.echo(f" No articles found for {tier_name}") continue click.echo(f" Found {len(all_articles)} articles") articles_without_sites = [a for a in all_articles if not a.site_deployment_id] if articles_without_sites and not skip_site_assignment: click.echo(f" Assigning sites to {len(articles_without_sites)} articles...") try: assign_sites_to_batch( content_records=all_articles, job=job, site_repo=site_repo, bunny_client=None, project_keyword=project.main_keyword ) session.expire_all() all_articles = content_repo.get_by_project_and_tier( project_id, tier_name, require_site=False ) click.echo(f" Assigned sites successfully") except Exception as e: click.echo(f" Warning: Site assignment failed: {e}") content_records = [a for a in all_articles if a.site_deployment_id] if not content_records: click.echo(f" No articles with site assignments, skipping {tier_name}") continue click.echo(f" Processing {len(content_records)} articles with sites...") if not skip_urls: click.echo(f" Generating URLs...") article_urls = generate_urls_for_batch(content_records, site_repo) click.echo(f" Generated {len(article_urls)} URLs") else: article_urls = {a.id: a.url for a in content_records if a.url} if not skip_interlinks: click.echo(f" Finding tiered links...") tiered_links = find_tiered_links( content_records, job, project_repo, content_repo, site_repo ) click.echo(f" Found tiered links for tier {tiered_links.get('tier', 'N/A')}") click.echo(f" Injecting interlinks...") inject_interlinks( content_records, article_urls, tiered_links, project, job, content_repo, link_repo ) click.echo(f" Interlinks injected successfully") if not skip_templates: click.echo(f" Applying templates...") template_count = 0 for content in content_records: try: if content_generator.apply_template(content.id): template_count += 1 except Exception as e: click.echo(f" Warning: Failed to apply template to content {content.id}: {e}") click.echo(f" Applied templates to {template_count}/{len(content_records)} articles") click.echo(f" {tier_name}: Complete\n") click.echo("\n" + "=" * 70) click.echo("Post-processing complete!") click.echo("=" * 70) click.echo(f"\nYou can now deploy with:") click.echo(f" uv run python main.py deploy-batch --batch-id {project_id}") finally: session.close() if __name__ == "__main__": post_process_batch()