""" CLI command definitions using Click """ import random import click from typing import Optional, List from src.core.config import get_config, get_bunny_account_api_key, get_concurrent_workers from src.auth.service import AuthService from src.database.session import db_manager from src.database.repositories import UserRepository, SiteDeploymentRepository, ProjectRepository from src.database.models import User from src.interlinking.anchor_text_generator import AnchorTextGenerator from src.deployment.bunnynet import ( BunnyNetClient, BunnyNetAPIError, BunnyNetAuthError, BunnyNetResourceConflictError ) from src.ingestion.parser import CORAParser, CORAParseError, SimpleSpreadsheetParser from src.generation.ai_client import AIClient, PromptManager from src.generation.service import ContentGenerator from src.generation.batch_processor import BatchProcessor from src.database.repositories import GeneratedContentRepository, SitePageRepository from src.deployment.bunny_storage import BunnyStorageError from src.deployment.deployment_service import DeploymentService from src.deployment.url_logger import URLLogger from src.templating.service import TemplateService from dotenv import load_dotenv import os import requests import random import json from pathlib import Path from datetime import datetime # Load .env file at module level load_dotenv() def _get_brands_for_url(url: str) -> List[str]: """ Look up brand names for a given URL from brands.json Args: url: Money site URL (e.g., "https://www.gullco.com") Returns: List of brand names, or empty list if not found or file missing """ try: from urllib.parse import urlparse # Normalize URL: remove scheme, www., trailing slash parsed = urlparse(url) domain = parsed.netloc # Remove www. prefix if present if domain.startswith('www.'): domain = domain[4:] # Load brands.json from project root brands_file = Path("brands.json") if not brands_file.exists(): return [] with open(brands_file, 'r', encoding='utf-8') as f: brands_data = json.load(f) # Look up normalized domain return brands_data.get(domain, []) except Exception: return [] def create_job_file_for_project( project_id: int, project_name: str, session, tier1_branded_ratio: Optional[float] = None, tier1_branded_text: Optional[str] = None, tier1_branded_plus_ratio: Optional[float] = None, brand_names: Optional[List[str]] = None, random_deployment_targets: Optional[int] = None ) -> Optional[str]: """ Create a job JSON file for a newly created project. Args: project_id: The ID of the created project project_name: The name of the project (for filename) session: Database session tier1_branded_ratio: Optional ratio of branded anchor text for tier1 (0.0-1.0) tier1_branded_text: Optional branded anchor text (company name) for tier1 tier1_branded_plus_ratio: Optional ratio of branded+ anchor text for tier1 (0.0-1.0, applied to remaining slots after branded) brand_names: Optional list of brand names for branded+ generation random_deployment_targets: Optional number of random deployment targets to select (default: random 2-3) Returns: Path to created file, or None if creation failed """ try: deployment_repo = SiteDeploymentRepository(session) sites = deployment_repo.get_all() available_domains = [ site.custom_hostname for site in sites if site.custom_hostname is not None ] if not available_domains: click.echo("Warning: No domains with custom hostnames found. Job file not created.", err=True) return None t1_count = tier1_count if tier1_count is not None else random.randint(10, 12) t2_count = random.randint(30, 45) if random_deployment_targets is not None: num_targets = min(random_deployment_targets, len(available_domains)) else: num_targets = min(random.randint(2, 3), len(available_domains)) selected_domains = random.sample(available_domains, num_targets) sanitized_name = "".join(c if c.isalnum() or c in ('-', '_') else '-' for c in project_name.lower()).strip('-') sanitized_name = '-'.join(sanitized_name.split()) jobs_dir = Path("jobs") jobs_dir.mkdir(exist_ok=True) base_filename = f"{sanitized_name}.json" filepath = jobs_dir / base_filename if filepath.exists(): date_suffix = datetime.now().strftime("%y%m%d") base_filename = f"{sanitized_name}-{date_suffix}.json" filepath = jobs_dir / base_filename # Build tier1 configuration tier1_config = { "count": t1_count, "min_word_count": 1250, "max_word_count": 2000, "models": { "title": "openai/gpt-4o-mini", "outline": "openai/gpt-4o-mini", "content": "x-ai/grok-4-fast" } } # Add anchor_text_config if branded ratio/text or branded+ ratio is provided if (tier1_branded_ratio is not None and tier1_branded_text) or (tier1_branded_plus_ratio is not None and brand_names): # Get project to retrieve main_keyword for non-branded terms project_repo = ProjectRepository(session) project = project_repo.get_by_id(project_id) if project and project.main_keyword: # First, get the actual available anchor text terms # Use custom anchor text from CORA if available, otherwise generate keyword variations if project.custom_anchor_text and len(project.custom_anchor_text) > 0: keyword_variations = project.custom_anchor_text elif project.related_searches and len(project.related_searches) > 0: keyword_variations = project.related_searches else: anchor_generator = AnchorTextGenerator() keyword_variations = anchor_generator._generate_from_keyword(project, 10) # Use the ACTUAL count of available terms actual_count = len(keyword_variations) # Calculate branded and remaining counts based on actual available terms branded_count = 0 if tier1_branded_ratio is not None and tier1_branded_text: branded_count = int(actual_count * tier1_branded_ratio) remaining_count = actual_count - branded_count # Parse comma-separated branded anchor texts branded_texts = [] if tier1_branded_text: branded_texts = [text.strip() for text in tier1_branded_text.split(',') if text.strip()] # Create anchor text list starting with branded terms anchor_terms = [] for i in range(branded_count): branded_text = branded_texts[i % len(branded_texts)] # Cycle through branded texts anchor_terms.append(branded_text) # Generate branded+ terms if enabled branded_plus_count = 0 if tier1_branded_plus_ratio is not None and brand_names and len(brand_names) > 0: branded_plus_count = int(remaining_count * tier1_branded_plus_ratio) # Generate branded+ terms from brands + related_searches # Use related_searches from project, or fallback to keyword_variations related_searches = project.related_searches if project.related_searches else keyword_variations branded_plus_terms = [] for brand in brand_names: for term in related_searches: branded_plus_terms.append(f"{brand} {term}") branded_plus_terms.append(f"{term} by {brand}") # Randomly select the needed number of branded+ terms if len(branded_plus_terms) > 0: if branded_plus_count > len(branded_plus_terms): selected_branded_plus = branded_plus_terms else: selected_branded_plus = random.sample(branded_plus_terms, branded_plus_count) anchor_terms.extend(selected_branded_plus) # Calculate regular count from remaining slots regular_count = remaining_count - branded_plus_count # Add regular terms if regular_count > 0: # Randomize keyword selection if we're not using all available terms if regular_count < len(keyword_variations): selected_keywords = random.sample(keyword_variations, regular_count) else: selected_keywords = keyword_variations[:regular_count] anchor_terms.extend(selected_keywords) tier1_config["anchor_text_config"] = { "mode": "explicit", "terms": anchor_terms } job_template = { "jobs": [ { "project_id": project_id, "deployment_targets": selected_domains, "tiers": { "tier1": tier1_config, "tier2": { "count": t2_count, "min_word_count": 1000, "max_word_count": 1250, "models": { "title": "openai/gpt-4o-mini", "outline": "openai/gpt-4o-mini", "content": "openai/gpt-4o-mini" }, "interlinking": { "links_per_article_min": 3, "links_per_article_max": 6 } } } } ] } with open(filepath, 'w', encoding='utf-8') as f: json.dump(job_template, f, indent=2) return str(filepath) except Exception as e: click.echo(f"Warning: Failed to create job file: {e}", err=True) return None def authenticate_admin(username: str, password: str) -> Optional[User]: """ Authenticate a user and verify they have admin role Args: username: The username to authenticate password: The password to authenticate Returns: User object if authenticated and is admin, None otherwise """ session = db_manager.get_session() try: user_repo = UserRepository(session) auth_service = AuthService(user_repo) user = auth_service.authenticate_user(username, password) if user and user.is_admin(): return user return None finally: session.close() def prompt_admin_credentials() -> tuple[str, str]: """ Prompt for admin username and password Checks environment variables CLIENT_USERNAME and CLIENT_PASSWORD first Returns: Tuple of (username, password) """ username = os.getenv("CLIENT_USERNAME") password = os.getenv("CLIENT_PASSWORD") if username and password: return username, password click.echo("Admin authentication required") if not username: username = click.prompt("Username", type=str) if not password: password = click.prompt("Password", type=str, hide_input=True) return username, password @click.group() @click.version_option(version="1.0.0") def app(): """Content Automation & Syndication Platform CLI""" pass @app.command() def config(): """Show current configuration""" try: config = get_config() click.echo("Current Configuration:") click.echo(f"Application: {config.application.name} v{config.application.version}") click.echo(f"Environment: {config.application.environment}") click.echo(f"Database: {config.database.url}") click.echo(f"AI Model: {config.ai_service.model}") click.echo(f"Log Level: {config.logging.level}") except Exception as e: click.echo(f"Error loading configuration: {e}", err=True) @app.command() def health(): """Check system health""" try: config = get_config() click.echo("[OK] Configuration loaded successfully") click.echo("[OK] System is healthy") except Exception as e: click.echo(f"[ERROR] System health check failed: {e}", err=True) raise click.Abort() @app.command() def models(): """List available AI models""" try: config = get_config() click.echo("Available AI Models:") click.echo(f"Current: {config.ai_service.model}") click.echo(f"Provider: {config.ai_service.provider}") click.echo(f"Base URL: {config.ai_service.base_url}") click.echo("\nAvailable models:") for model_name, model_id in config.ai_service.available_models.items(): status = " (current)" if model_id == config.ai_service.model else "" click.echo(f" {model_name}: {model_id}{status}") except Exception as e: click.echo(f"Error listing models: {e}", err=True) @app.command("add-user") @click.option("--username", prompt=True, help="Username for the new user") @click.option("--password", prompt=True, hide_input=True, confirmation_prompt=True, help="Password for the new user") @click.option("--role", type=click.Choice(["Admin", "User"], case_sensitive=True), prompt=True, help="Role for the new user") @click.option("--admin-user", help="Admin username for authentication") @click.option("--admin-password", help="Admin password for authentication") def add_user(username: str, password: str, role: str, admin_user: Optional[str], admin_password: Optional[str]): """Create a new user (requires admin authentication)""" try: # Authenticate admin if not admin_user or not admin_password: admin_user, admin_password = prompt_admin_credentials() admin = authenticate_admin(admin_user, admin_password) if not admin: click.echo("Error: Authentication failed or insufficient permissions", err=True) raise click.Abort() # Create the new user session = db_manager.get_session() try: user_repo = UserRepository(session) auth_service = AuthService(user_repo) new_user = auth_service.create_user_with_hashed_password( username=username, password=password, role=role ) click.echo(f"Success: User '{new_user.username}' created with role '{new_user.role}'") finally: session.close() except ValueError as e: click.echo(f"Error: {e}", err=True) raise click.Abort() except Exception as e: click.echo(f"Error creating user: {e}", err=True) raise click.Abort() @app.command("delete-user") @click.option("--username", prompt=True, help="Username to delete") @click.option("--admin-user", help="Admin username for authentication") @click.option("--admin-password", help="Admin password for authentication") @click.confirmation_option(prompt="Are you sure you want to delete this user?") def delete_user(username: str, admin_user: Optional[str], admin_password: Optional[str]): """Delete a user by username (requires admin authentication)""" try: # Authenticate admin if not admin_user or not admin_password: admin_user, admin_password = prompt_admin_credentials() admin = authenticate_admin(admin_user, admin_password) if not admin: click.echo("Error: Authentication failed or insufficient permissions", err=True) raise click.Abort() # Prevent admin from deleting themselves if admin.username == username: click.echo("Error: Cannot delete your own account", err=True) raise click.Abort() # Delete the user session = db_manager.get_session() try: user_repo = UserRepository(session) # Check if user exists user_to_delete = user_repo.get_by_username(username) if not user_to_delete: click.echo(f"Error: User '{username}' not found", err=True) raise click.Abort() # Delete the user success = user_repo.delete(user_to_delete.id) if success: click.echo(f"Success: User '{username}' has been deleted") else: click.echo(f"Error: Failed to delete user '{username}'", err=True) raise click.Abort() finally: session.close() except Exception as e: click.echo(f"Error deleting user: {e}", err=True) raise click.Abort() @app.command("list-users") @click.option("--admin-user", help="Admin username for authentication") @click.option("--admin-password", help="Admin password for authentication") def list_users(admin_user: Optional[str], admin_password: Optional[str]): """List all users (requires admin authentication)""" try: # Authenticate admin if not admin_user or not admin_password: admin_user, admin_password = prompt_admin_credentials() admin = authenticate_admin(admin_user, admin_password) if not admin: click.echo("Error: Authentication failed or insufficient permissions", err=True) raise click.Abort() # List all users session = db_manager.get_session() try: user_repo = UserRepository(session) users = user_repo.get_all() if not users: click.echo("No users found") return click.echo(f"\nTotal users: {len(users)}") click.echo("-" * 60) click.echo(f"{'ID':<5} {'Username':<20} {'Role':<10} {'Created'}") click.echo("-" * 60) for user in users: created = user.created_at.strftime("%Y-%m-%d %H:%M:%S") click.echo(f"{user.id:<5} {user.username:<20} {user.role:<10} {created}") click.echo("-" * 60) finally: session.close() except Exception as e: click.echo(f"Error listing users: {e}", err=True) raise click.Abort() @app.command("provision-site") @click.option("--name", prompt=True, help="Site name") @click.option("--domain", prompt=True, help="Custom domain (FQDN, e.g., www.example.com)") @click.option("--storage-name", prompt=True, help="Storage Zone name (must be globally unique)") @click.option("--region", prompt=True, type=click.Choice(["DE", "NY", "LA", "SG", "SYD"]), help="Storage region") @click.option("--admin-user", help="Admin username for authentication") @click.option("--admin-password", help="Admin password for authentication") def provision_site(name: str, domain: str, storage_name: str, region: str, admin_user: Optional[str], admin_password: Optional[str]): """Provision a new site with Storage Zone and Pull Zone (requires admin)""" try: # Authenticate admin if not admin_user or not admin_password: admin_user, admin_password = prompt_admin_credentials() admin = authenticate_admin(admin_user, admin_password) if not admin: click.echo("Error: Authentication failed or insufficient permissions", err=True) raise click.Abort() # Get bunny.net API key try: api_key = get_bunny_account_api_key() except ValueError as e: click.echo(f"Error: {e}", err=True) click.echo("Please set BUNNY_ACCOUNT_API_KEY in your .env file", err=True) raise click.Abort() click.echo(f"\nProvisioning site '{name}' with domain '{domain}'...") # Initialize bunny.net client client = BunnyNetClient(api_key) session = db_manager.get_session() try: deployment_repo = SiteDeploymentRepository(session) # Check if domain already exists if deployment_repo.exists(domain): click.echo(f"Error: Site with domain '{domain}' already exists", err=True) raise click.Abort() # Step 1: Create Storage Zone click.echo(f"Step 1/3: Creating Storage Zone '{storage_name}' in region {region}...") storage_result = client.create_storage_zone(storage_name, region) click.echo(f" Storage Zone created: ID={storage_result.id}") # Step 2: Create Pull Zone pull_zone_name = f"{storage_name}-cdn" click.echo(f"Step 2/3: Creating Pull Zone '{pull_zone_name}'...") pull_result = client.create_pull_zone(pull_zone_name, storage_result.id) click.echo(f" Pull Zone created: ID={pull_result.id}, Hostname={pull_result.hostname}") # Step 3: Add Custom Hostname click.echo(f"Step 3/3: Adding custom hostname '{domain}'...") client.add_custom_hostname(pull_result.id, domain) click.echo(f" Custom hostname added successfully") # Save to database deployment = deployment_repo.create( site_name=name, custom_hostname=domain, storage_zone_id=storage_result.id, storage_zone_name=storage_result.name, storage_zone_password=storage_result.password, storage_zone_region=storage_result.region, pull_zone_id=pull_result.id, pull_zone_bcdn_hostname=pull_result.hostname ) # Randomly assign template template_service = TemplateService() available_templates = template_service.get_available_templates() if available_templates: deployment.template_name = random.choice(available_templates) session.commit() session.refresh(deployment) click.echo(f" Template assigned: {deployment.template_name}") click.echo("\n" + "=" * 70) click.echo("Site provisioned successfully!") click.echo("=" * 70) click.echo("\nMANUAL DNS CONFIGURATION REQUIRED:") click.echo("You must create the following CNAME record with your domain registrar:\n") click.echo(f" Type: CNAME") subdomain = domain.split('.')[0] if '.' in domain else '@' click.echo(f" Host: {subdomain}") click.echo(f" Value: {pull_result.hostname}") click.echo("\nExample DNS configuration:") click.echo(f" Type: CNAME") click.echo(f" Host: {subdomain}") click.echo(f" Value: {pull_result.hostname}") click.echo("\nNote: DNS propagation may take up to 48 hours.") click.echo("=" * 70) except BunnyNetAuthError as e: click.echo(f"Error: Authentication failed - {e}", err=True) click.echo("Please check your BUNNY_ACCOUNT_API_KEY", err=True) raise click.Abort() except BunnyNetResourceConflictError as e: click.echo(f"Error: Resource conflict - {e}", err=True) click.echo("Storage Zone or Pull Zone name already exists. Try a different name.", err=True) raise click.Abort() except BunnyNetAPIError as e: click.echo(f"Error: bunny.net API error - {e}", err=True) raise click.Abort() finally: session.close() except Exception as e: click.echo(f"Error provisioning site: {e}", err=True) raise click.Abort() @app.command("attach-domain") @click.option("--name", prompt=True, help="Site name") @click.option("--domain", prompt=True, help="Custom domain (FQDN, e.g., www.example.com)") @click.option("--storage-name", prompt=True, help="Existing Storage Zone name") @click.option("--admin-user", help="Admin username for authentication") @click.option("--admin-password", help="Admin password for authentication") def attach_domain(name: str, domain: str, storage_name: str, admin_user: Optional[str], admin_password: Optional[str]): """Attach a domain to an existing Storage Zone (requires admin)""" try: # Authenticate admin if not admin_user or not admin_password: admin_user, admin_password = prompt_admin_credentials() admin = authenticate_admin(admin_user, admin_password) if not admin: click.echo("Error: Authentication failed or insufficient permissions", err=True) raise click.Abort() # Get bunny.net API key try: api_key = get_bunny_account_api_key() except ValueError as e: click.echo(f"Error: {e}", err=True) click.echo("Please set BUNNY_ACCOUNT_API_KEY in your .env file", err=True) raise click.Abort() click.echo(f"\nAttaching domain '{domain}' to existing Storage Zone '{storage_name}'...") # Initialize bunny.net client client = BunnyNetClient(api_key) session = db_manager.get_session() try: deployment_repo = SiteDeploymentRepository(session) # Check if domain already exists if deployment_repo.exists(domain): click.echo(f"Error: Site with domain '{domain}' already exists", err=True) raise click.Abort() # Step 1: Find existing Storage Zone click.echo(f"Step 1/3: Finding Storage Zone '{storage_name}'...") storage_result = client.find_storage_zone_by_name(storage_name) if not storage_result: click.echo(f"Error: Storage Zone '{storage_name}' not found", err=True) raise click.Abort() click.echo(f" Storage Zone found: ID={storage_result.id}") # Step 2: Create Pull Zone pull_zone_name = f"{storage_name}-{domain.replace('.', '-')}" click.echo(f"Step 2/3: Creating Pull Zone '{pull_zone_name}'...") pull_result = client.create_pull_zone(pull_zone_name, storage_result.id) click.echo(f" Pull Zone created: ID={pull_result.id}, Hostname={pull_result.hostname}") # Step 3: Add Custom Hostname click.echo(f"Step 3/3: Adding custom hostname '{domain}'...") client.add_custom_hostname(pull_result.id, domain) click.echo(f" Custom hostname added successfully") # Save to database deployment = deployment_repo.create( site_name=name, custom_hostname=domain, storage_zone_id=storage_result.id, storage_zone_name=storage_result.name, storage_zone_password=storage_result.password, storage_zone_region=storage_result.region, pull_zone_id=pull_result.id, pull_zone_bcdn_hostname=pull_result.hostname ) # Randomly assign template template_service = TemplateService() available_templates = template_service.get_available_templates() if available_templates: deployment.template_name = random.choice(available_templates) session.commit() session.refresh(deployment) click.echo(f" Template assigned: {deployment.template_name}") click.echo("\n" + "=" * 70) click.echo("Domain attached successfully!") click.echo("=" * 70) click.echo("\nMANUAL DNS CONFIGURATION REQUIRED:") click.echo("You must create the following CNAME record with your domain registrar:\n") click.echo(f" Type: CNAME") subdomain = domain.split('.')[0] if '.' in domain else '@' click.echo(f" Host: {subdomain}") click.echo(f" Value: {pull_result.hostname}") click.echo("\nExample DNS configuration:") click.echo(f" Type: CNAME") click.echo(f" Host: {subdomain}") click.echo(f" Value: {pull_result.hostname}") click.echo("\nNote: DNS propagation may take up to 48 hours.") click.echo("=" * 70) except BunnyNetAuthError as e: click.echo(f"Error: Authentication failed - {e}", err=True) click.echo("Please check your BUNNY_ACCOUNT_API_KEY", err=True) raise click.Abort() except BunnyNetResourceConflictError as e: click.echo(f"Error: Resource conflict - {e}", err=True) click.echo("Pull Zone name already exists. Try a different domain.", err=True) raise click.Abort() except BunnyNetAPIError as e: click.echo(f"Error: bunny.net API error - {e}", err=True) raise click.Abort() finally: session.close() except Exception as e: click.echo(f"Error attaching domain: {e}", err=True) raise click.Abort() @app.command("list-sites") @click.option("--admin-user", help="Admin username for authentication") @click.option("--admin-password", help="Admin password for authentication") def list_sites(admin_user: Optional[str], admin_password: Optional[str]): """List all site deployments (requires admin)""" try: # Authenticate admin if not admin_user or not admin_password: admin_user, admin_password = prompt_admin_credentials() admin = authenticate_admin(admin_user, admin_password) if not admin: click.echo("Error: Authentication failed or insufficient permissions", err=True) raise click.Abort() # List all sites session = db_manager.get_session() try: deployment_repo = SiteDeploymentRepository(session) sites = deployment_repo.get_all() if not sites: click.echo("No site deployments found") return click.echo(f"\nTotal sites: {len(sites)}") click.echo("-" * 100) click.echo(f"{'ID':<5} {'Site Name':<25} {'Custom Domain':<30} {'Storage Zone':<20} {'Region':<8}") click.echo("-" * 100) for site in sites: click.echo(f"{site.id:<5} {site.site_name:<25} {site.custom_hostname or 'N/A':<30} " f"{site.storage_zone_name:<20} {site.storage_zone_region:<8}") click.echo("-" * 100) finally: session.close() except Exception as e: click.echo(f"Error listing sites: {e}", err=True) raise click.Abort() @app.command("discover-s3-buckets") def discover_s3_buckets(): """Discover and register AWS S3 buckets as site deployments""" try: # Import here to avoid circular dependencies import subprocess import sys from pathlib import Path # Get the script path script_dir = Path(__file__).parent.parent.parent script_path = script_dir / "scripts" / "discover_s3_buckets.py" if not script_path.exists(): click.echo(f"Error: Discovery script not found at {script_path}", err=True) raise click.Abort() # Run the discovery script click.echo("Running S3 bucket discovery script...\n") result = subprocess.run([sys.executable, str(script_path)], check=False) if result.returncode != 0: click.echo(f"\nDiscovery script exited with code {result.returncode}", err=True) raise click.Abort() except FileNotFoundError: click.echo("Error: Discovery script not found", err=True) raise click.Abort() except Exception as e: click.echo(f"Error running discovery script: {e}", err=True) raise click.Abort() @app.command("get-site") @click.option("--domain", prompt=True, help="Custom domain to lookup") @click.option("--admin-user", help="Admin username for authentication") @click.option("--admin-password", help="Admin password for authentication") def get_site(domain: str, admin_user: Optional[str], admin_password: Optional[str]): """Get detailed information about a site deployment (requires admin)""" try: # Authenticate admin if not admin_user or not admin_password: admin_user, admin_password = prompt_admin_credentials() admin = authenticate_admin(admin_user, admin_password) if not admin: click.echo("Error: Authentication failed or insufficient permissions", err=True) raise click.Abort() # Get site details session = db_manager.get_session() try: deployment_repo = SiteDeploymentRepository(session) site = deployment_repo.get_by_hostname(domain) if not site: click.echo(f"Error: Site with domain '{domain}' not found", err=True) raise click.Abort() click.echo("\n" + "=" * 70) click.echo("Site Deployment Details") click.echo("=" * 70) click.echo(f"ID: {site.id}") click.echo(f"Site Name: {site.site_name}") click.echo(f"Custom Domain: {site.custom_hostname}") click.echo(f"\nStorage Zone:") click.echo(f" ID: {site.storage_zone_id}") click.echo(f" Name: {site.storage_zone_name}") click.echo(f" Region: {site.storage_zone_region}") click.echo(f" Password: {site.storage_zone_password}") click.echo(f"\nPull Zone:") click.echo(f" ID: {site.pull_zone_id}") click.echo(f" b-cdn Hostname: {site.pull_zone_bcdn_hostname}") click.echo(f"\nTimestamps:") click.echo(f" Created: {site.created_at.strftime('%Y-%m-%d %H:%M:%S')}") click.echo(f" Updated: {site.updated_at.strftime('%Y-%m-%d %H:%M:%S')}") click.echo("=" * 70) finally: session.close() except Exception as e: click.echo(f"Error getting site details: {e}", err=True) raise click.Abort() @app.command("remove-site") @click.option("--domain", prompt=True, help="Custom domain to remove") @click.option("--admin-user", help="Admin username for authentication") @click.option("--admin-password", help="Admin password for authentication") @click.confirmation_option(prompt="Are you sure you want to remove this site deployment record?") def remove_site(domain: str, admin_user: Optional[str], admin_password: Optional[str]): """Remove a site deployment record (requires admin)""" try: # Authenticate admin if not admin_user or not admin_password: admin_user, admin_password = prompt_admin_credentials() admin = authenticate_admin(admin_user, admin_password) if not admin: click.echo("Error: Authentication failed or insufficient permissions", err=True) raise click.Abort() # Remove site session = db_manager.get_session() try: deployment_repo = SiteDeploymentRepository(session) # Check if site exists site = deployment_repo.get_by_hostname(domain) if not site: click.echo(f"Error: Site with domain '{domain}' not found", err=True) raise click.Abort() # Delete the site success = deployment_repo.delete(site.id) if success: click.echo(f"Success: Site deployment record for '{domain}' has been removed") click.echo("\nNote: This does NOT delete resources from bunny.net.") click.echo("You must manually delete the Storage Zone and Pull Zone if needed.") else: click.echo(f"Error: Failed to remove site '{domain}'", err=True) raise click.Abort() finally: session.close() except Exception as e: click.echo(f"Error removing site: {e}", err=True) raise click.Abort() @app.command("sync-sites") @click.option("--admin-user", help="Admin username for authentication") @click.option("--admin-password", help="Admin password for authentication") @click.option("--dry-run", is_flag=True, help="Show what would be imported without making changes") def sync_sites(admin_user: Optional[str], admin_password: Optional[str], dry_run: bool): """Sync existing bunny.net sites with custom domains to database (requires admin)""" try: # Authenticate admin if not admin_user or not admin_password: admin_user, admin_password = prompt_admin_credentials() admin = authenticate_admin(admin_user, admin_password) if not admin: click.echo("Error: Authentication failed or insufficient permissions", err=True) raise click.Abort() # Get bunny.net API key try: api_key = get_bunny_account_api_key() except ValueError as e: click.echo(f"Error: {e}", err=True) click.echo("Please set BUNNY_ACCOUNT_API_KEY in your .env file", err=True) raise click.Abort() click.echo("\nSyncing sites from bunny.net...") if dry_run: click.echo("DRY RUN MODE - No changes will be made\n") # Initialize bunny.net client client = BunnyNetClient(api_key) session = db_manager.get_session() try: deployment_repo = SiteDeploymentRepository(session) # Get all storage zones (with passwords!) click.echo("Fetching Storage Zones from bunny.net...") storage_zones = client.get_storage_zones() storage_zone_map = {zone["Id"]: zone for zone in storage_zones} click.echo(f" Found {len(storage_zones)} Storage Zones") # Get all pull zones click.echo("Fetching Pull Zones from bunny.net...") pull_zones = client.get_pull_zones() click.echo(f" Found {len(pull_zones)} Pull Zones") # Process pull zones with custom hostnames imported = 0 skipped = 0 errors = 0 click.echo("\nProcessing Pull Zones with custom domains...") click.echo("=" * 80) for pz in pull_zones: # Skip if not linked to storage zone if not pz.get("StorageZoneId"): continue storage_zone_id = pz["StorageZoneId"] storage_zone = storage_zone_map.get(storage_zone_id) if not storage_zone: continue # Get pull zone details to see hostnames pz_details = client.get_pull_zone(pz["Id"]) if not pz_details: continue hostnames = pz_details.get("Hostnames", []) # Get the default b-cdn hostname default_hostname = next( (h["Value"] for h in hostnames if h.get("Value") and h["Value"].endswith(".b-cdn.net")), f"{pz['Name']}.b-cdn.net" ) # Filter for custom hostnames (not *.b-cdn.net) custom_hostnames = [ h["Value"] for h in hostnames if h.get("Value") and not h["Value"].endswith(".b-cdn.net") ] # Create list of sites to import: custom domains first, then bcdn-only if no custom domains sites_to_import = [] if custom_hostnames: for ch in custom_hostnames: sites_to_import.append((ch, default_hostname)) else: sites_to_import.append((None, default_hostname)) # Import each site deployment for custom_hostname, bcdn_hostname in sites_to_import: try: # Check if already exists check_hostname = custom_hostname or bcdn_hostname if deployment_repo.exists(check_hostname): click.echo(f"SKIP: {check_hostname} (already in database)") skipped += 1 continue if dry_run: click.echo(f"WOULD IMPORT: {check_hostname}") click.echo(f" Storage Zone: {storage_zone['Name']} (Region: {storage_zone.get('Region', 'Unknown')})") click.echo(f" Pull Zone: {pz['Name']} (ID: {pz['Id']})") click.echo(f" b-cdn Hostname: {bcdn_hostname}") if custom_hostname: click.echo(f" Custom Domain: {custom_hostname}") imported += 1 else: # Create site deployment deployment = deployment_repo.create( site_name=storage_zone['Name'], storage_zone_id=storage_zone['Id'], storage_zone_name=storage_zone['Name'], storage_zone_password=storage_zone.get('Password', ''), storage_zone_region=storage_zone.get('Region', ''), pull_zone_id=pz['Id'], pull_zone_bcdn_hostname=bcdn_hostname, custom_hostname=custom_hostname ) # Randomly assign template template_service = TemplateService() available_templates = template_service.get_available_templates() if available_templates: deployment.template_name = random.choice(available_templates) session.commit() session.refresh(deployment) click.echo(f"IMPORTED: {check_hostname}") click.echo(f" Storage Zone: {storage_zone['Name']} (Region: {storage_zone.get('Region', 'Unknown')})") click.echo(f" Pull Zone: {pz['Name']} (ID: {pz['Id']})") if custom_hostname: click.echo(f" Custom Domain: {custom_hostname}") click.echo(f" Template: {deployment.template_name}") imported += 1 except Exception as e: click.echo(f"ERROR importing {check_hostname}: {e}", err=True) errors += 1 click.echo("=" * 80) click.echo(f"\nSync Summary:") click.echo(f" Imported: {imported}") click.echo(f" Skipped (already exists): {skipped}") click.echo(f" Errors: {errors}") if dry_run: click.echo("\nDRY RUN complete - no changes were made") click.echo("Run without --dry-run to import these sites") except BunnyNetAuthError as e: click.echo(f"Error: Authentication failed - {e}", err=True) click.echo("Please check your BUNNY_ACCOUNT_API_KEY", err=True) raise click.Abort() except BunnyNetAPIError as e: click.echo(f"Error: bunny.net API error - {e}", err=True) raise click.Abort() finally: session.close() except Exception as e: click.echo(f"Error syncing sites: {e}", err=True) raise click.Abort() @app.command() @click.option('--file', '-f', 'file_path', required=True, type=click.Path(exists=True), help='Path to CORA .xlsx file') @click.option('--name', '-n', required=True, help='Project name') @click.option('--money-site-url', '-m', help='Money site URL (e.g., https://example.com)') @click.option('--custom-anchors', '-a', help='Comma-separated list of custom anchor text (optional)') @click.option('--tier1-branded-ratio', '-t', default=None, type=float, help='Ratio of branded anchor text for tier1 (optional, only prompts if provided)') @click.option('--tier1-branded-plus-ratio', '-bp', default=None, type=float, help='Ratio of branded+ anchor text for tier1 (optional, applied to remaining slots after branded)') @click.option('--random-deployment-targets', '-r', type=int, help='Number of random deployment targets to select (default: random 2-3)') @click.option('--tier1-count', type=int, help='Number of tier1 articles (default: random 10-12)') @click.option('--username', '-u', help='Username for authentication') @click.option('--password', '-p', help='Password for authentication') def ingest_cora(file_path: str, name: str, money_site_url: Optional[str], custom_anchors: Optional[str], tier1_branded_ratio: float, tier1_branded_plus_ratio: Optional[float], random_deployment_targets: Optional[int], tier1_count: Optional[int], username: Optional[str], password: Optional[str]): """Ingest a CORA .xlsx report and create a new project""" try: if not username or not password: username, password = prompt_admin_credentials() session = db_manager.get_session() try: user_repo = UserRepository(session) auth_service = AuthService(user_repo) user = auth_service.authenticate_user(username, password) if not user: click.echo("Error: Authentication failed", err=True) raise click.Abort() click.echo(f"Authenticated as: {user.username} ({user.role})") click.echo(f"\nParsing CORA file: {file_path}") custom_anchor_list = [] if custom_anchors: custom_anchor_list = [anchor.strip() for anchor in custom_anchors.split(',') if anchor.strip()] parser = CORAParser(file_path) cora_data = parser.parse(custom_anchor_text=custom_anchor_list) click.echo(f"Main Keyword: {cora_data['main_keyword']}") click.echo(f"Word Count: {cora_data['word_count']}") click.echo(f"Entities Found: {len(cora_data['entities'])}") click.echo(f"Related Searches: {len(cora_data['related_searches'])}") # Prompt for money_site_url if not provided if not money_site_url: money_site_url = click.prompt( "\nEnter money site URL (required for tiered linking)", type=str ) # Validate money_site_url if not money_site_url.startswith('http://') and not money_site_url.startswith('https://'): click.echo("Error: Money site URL must start with http:// or https://", err=True) raise click.Abort() # Clean up URL (remove trailing slash) money_site_url = money_site_url.rstrip('/') click.echo(f"\nCreating project: {name}") click.echo(f"Money Site URL: {money_site_url}") # Add money_site_url to cora_data cora_data['money_site_url'] = money_site_url project_repo = ProjectRepository(session) project = project_repo.create( user_id=user.id, name=name, data=cora_data ) click.echo(f"\nSuccess: Project '{project.name}' created (ID: {project.id})") click.echo(f"Main Keyword: {project.main_keyword}") click.echo(f"Money Site URL: {project.money_site_url}") click.echo(f"Entities: {len(project.entities or [])}") click.echo(f"Related Searches: {len(project.related_searches or [])}") if project.custom_anchor_text: click.echo(f"Custom Anchor Text: {', '.join(project.custom_anchor_text)}") # Handle tier1 branded anchor text if ratio is specified tier1_branded_text = None brand_names = None if tier1_branded_ratio is not None and tier1_branded_ratio > 0: # Look up default brands from brand mapping default_brands = _get_brands_for_url(money_site_url) default_prompt = "" if default_brands: default_prompt = f" [default: '{', '.join(default_brands)}'] (press Enter for default)" tier1_branded_text = click.prompt( f"\nEnter branded anchor text (company name) for tier1 (comma-separated for multiple, e.g., 'AGI Fabricators, AGI'){default_prompt}", type=str, default="" ).strip() # Use defaults if Enter was pressed and defaults exist if not tier1_branded_text and default_brands: tier1_branded_text = ", ".join(default_brands) click.echo(f"Using default brands: {tier1_branded_text}") if not tier1_branded_text: click.echo("Warning: Empty branded anchor text provided, skipping tier1 branded anchor text configuration.", err=True) tier1_branded_text = None tier1_branded_ratio = None else: # Parse brand names for branded+ generation brand_names = [text.strip() for text in tier1_branded_text.split(',') if text.strip()] # Handle branded+ ratio if flag is provided if tier1_branded_plus_ratio is not None: # Validate the provided ratio if tier1_branded_plus_ratio <= 0 or tier1_branded_plus_ratio > 1: click.echo("Warning: Invalid branded+ ratio provided, skipping branded+ configuration.", err=True) tier1_branded_plus_ratio = None elif not brand_names: # If brand names weren't set from branded prompt, try to get them from brand lookup default_brands = _get_brands_for_url(money_site_url) if default_brands: brand_names = default_brands click.echo(f"Using brand names from mapping for branded+: {', '.join(brand_names)}") else: click.echo("Warning: No brand names available for branded+ (set --tier1-branded-ratio or add to brands.json). Skipping branded+ configuration.", err=True) tier1_branded_plus_ratio = None job_file = create_job_file_for_project( project.id, project.name, session, tier1_branded_ratio=tier1_branded_ratio, tier1_branded_text=tier1_branded_text, tier1_branded_plus_ratio=tier1_branded_plus_ratio, brand_names=brand_names, random_deployment_targets=random_deployment_targets ) if job_file: click.echo(f"Job file created: {job_file}") except CORAParseError as e: click.echo(f"Error parsing CORA file: {e}", err=True) raise click.Abort() except ValueError as e: click.echo(f"Error creating project: {e}", err=True) raise click.Abort() finally: session.close() except Exception as e: click.echo(f"Error ingesting CORA file: {e}", err=True) raise click.Abort() @app.command() @click.option('--file', '-f', 'file_path', required=True, type=click.Path(exists=True), help='Path to simple .xlsx spreadsheet file') @click.option('--name', '-n', help='Project name (overrides project_name from spreadsheet if provided)') @click.option('--money-site-url', '-m', help='Money site URL (e.g., https://example.com)') @click.option('--username', '-u', help='Username for authentication') @click.option('--password', '-p', help='Password for authentication') def ingest_simple(file_path: str, name: Optional[str], money_site_url: Optional[str], username: Optional[str], password: Optional[str]): """Ingest a simple spreadsheet and create a new project Expected spreadsheet format: - First row: Headers (main_keyword, project_name, related_searches, entities) - Second row: Data values Required columns: main_keyword, project_name, related_searches, entities - main_keyword: Single phrase keyword - project_name: Name for the project - related_searches: Comma-delimited list (e.g., "term1, term2, term3") - entities: Comma-delimited list (e.g., "entity1, entity2, entity3") Optional columns (with defaults): - word_count: Default 1500 - term_frequency: Default 3 """ try: if not username or not password: username, password = prompt_admin_credentials() session = db_manager.get_session() try: user_repo = UserRepository(session) auth_service = AuthService(user_repo) user = auth_service.authenticate_user(username, password) if not user: click.echo("Error: Authentication failed", err=True) raise click.Abort() click.echo(f"Authenticated as: {user.username} ({user.role})") click.echo(f"\nParsing simple spreadsheet: {file_path}") parser = SimpleSpreadsheetParser(file_path) data = parser.parse() project_name = name or data.get("project_name") if not project_name: click.echo("Error: Project name is required (provide via --name or in spreadsheet)", err=True) raise click.Abort() click.echo(f"Main Keyword: {data['main_keyword']}") click.echo(f"Project Name: {project_name}") click.echo(f"Word Count: {data['word_count']}") click.echo(f"Term Frequency: {data['term_frequency']}") click.echo(f"Entities: {len(data['entities'])}") click.echo(f"Related Searches: {len(data['related_searches'])}") if data['entities']: click.echo(f" Entities: {', '.join(data['entities'][:5])}" + (f" ... (+{len(data['entities']) - 5} more)" if len(data['entities']) > 5 else "")) if data['related_searches']: click.echo(f" Related Searches: {', '.join(data['related_searches'][:5])}" + (f" ... (+{len(data['related_searches']) - 5} more)" if len(data['related_searches']) > 5 else "")) if not money_site_url: money_site_url = click.prompt( "\nEnter money site URL (required for tiered linking)", type=str ) if not money_site_url.startswith('http://') and not money_site_url.startswith('https://'): click.echo("Error: Money site URL must start with http:// or https://", err=True) raise click.Abort() money_site_url = money_site_url.rstrip('/') click.echo(f"\nCreating project: {project_name}") click.echo(f"Money Site URL: {money_site_url}") data['money_site_url'] = money_site_url project_data = {k: v for k, v in data.items() if k != 'project_name'} project_repo = ProjectRepository(session) project = project_repo.create( user_id=user.id, name=project_name, data=project_data ) click.echo(f"\nSuccess: Project '{project.name}' created (ID: {project.id})") click.echo(f"Main Keyword: {project.main_keyword}") click.echo(f"Money Site URL: {project.money_site_url}") click.echo(f"Word Count: {project.word_count}") click.echo(f"Term Frequency: {project.term_frequency}") click.echo(f"Entities: {len(project.entities or [])}") click.echo(f"Related Searches: {len(project.related_searches or [])}") except CORAParseError as e: click.echo(f"Error parsing spreadsheet: {e}", err=True) raise click.Abort() except ValueError as e: click.echo(f"Error creating project: {e}", err=True) raise click.Abort() finally: session.close() except Exception as e: click.echo(f"Error ingesting spreadsheet: {e}", err=True) raise click.Abort() @app.command() @click.option('--username', '-u', help='Username for authentication') @click.option('--password', '-p', help='Password for authentication') def list_projects(username: Optional[str], password: Optional[str]): """List all projects for the authenticated user""" try: if not username or not password: username, password = prompt_admin_credentials() session = db_manager.get_session() try: user_repo = UserRepository(session) auth_service = AuthService(user_repo) user = auth_service.authenticate_user(username, password) if not user: click.echo("Error: Authentication failed", err=True) raise click.Abort() project_repo = ProjectRepository(session) if user.is_admin(): projects = project_repo.get_all() click.echo(f"\nAll Projects (Admin View):") else: projects = project_repo.get_by_user_id(user.id) click.echo(f"\nYour Projects:") if not projects: click.echo("No projects found") return click.echo(f"Total projects: {len(projects)}") click.echo("-" * 80) click.echo(f"{'ID':<5} {'Name':<30} {'Keyword':<25} {'Created':<20}") click.echo("-" * 80) for project in projects: created_str = project.created_at.strftime('%Y-%m-%d %H:%M:%S') click.echo(f"{project.id:<5} {project.name[:29]:<30} {project.main_keyword[:24]:<25} {created_str:<20}") click.echo("-" * 80) finally: session.close() except Exception as e: click.echo(f"Error listing projects: {e}", err=True) raise click.Abort() @app.command("create-job") @click.option('--project-id', '-p', required=True, type=int, help='Project ID to create job file for') @click.option('--deployment-targets', '-d', multiple=True, help='Deployment target hostnames (can specify multiple times)') @click.option('--tier1-count', default=10, type=int, help='Number of tier1 articles (default: 10)') @click.option('--tier2-count', default=30, type=int, help='Number of tier2 articles (default: 30)') @click.option('--tier1-branded-ratio', '-t', default=None, type=float, help='Ratio of branded anchor text for tier1 (optional, only prompts if provided)') @click.option('--output', '-o', type=click.Path(), help='Output file path (default: jobs/{project_name}.json)') @click.option('--username', '-u', help='Username for authentication') @click.option('--password', '-pwd', help='Password for authentication') def create_job( project_id: int, deployment_targets: tuple, tier1_count: int, tier2_count: int, tier1_branded_ratio: Optional[float], output: Optional[str], username: Optional[str], password: Optional[str] ): """Create a job file from an existing project ID""" try: if not username or not password: username, password = prompt_admin_credentials() session = db_manager.get_session() try: user_repo = UserRepository(session) auth_service = AuthService(user_repo) user = auth_service.authenticate_user(username, password) if not user: click.echo("Error: Authentication failed", err=True) raise click.Abort() project_repo = ProjectRepository(session) project = project_repo.get_by_id(project_id) if not project: click.echo(f"Error: Project {project_id} not found", err=True) raise click.Abort() deployment_targets_list = list(deployment_targets) if deployment_targets else None if not deployment_targets_list: site_repo = SiteDeploymentRepository(session) sites = site_repo.get_all() available_domains = [ site.custom_hostname for site in sites if site.custom_hostname is not None ] if available_domains: click.echo(f"Available sites: {', '.join(available_domains[:5])}{'...' if len(available_domains) > 5 else ''}") click.echo("Note: No deployment_targets specified. You can add them manually to the job file.") sanitized_name = "".join(c if c.isalnum() or c in ('-', '_') else '-' for c in project.name.lower()).strip('-') sanitized_name = '-'.join(sanitized_name.split()) jobs_dir = Path("jobs") jobs_dir.mkdir(exist_ok=True) if output: filepath = Path(output) else: base_filename = f"{sanitized_name}.json" filepath = jobs_dir / base_filename if filepath.exists(): date_suffix = datetime.now().strftime("%y%m%d") base_filename = f"{sanitized_name}-{date_suffix}.json" filepath = jobs_dir / base_filename job_template = { "jobs": [ { "project_id": project_id, "tiers": { "tier1": { "count": tier1_count, "min_word_count": 1250, "max_word_count": 2000, "models": { "title": "openai/gpt-4o-mini", "outline": "openai/gpt-4o-mini", "content": "anthropic/claude-3.5-sonnet" } }, "tier2": { "count": tier2_count, "min_word_count": 1000, "max_word_count": 1250, "models": { "title": "openai/gpt-4o-mini", "outline": "openai/gpt-4o-mini", "content": "openai/gpt-4o-mini" }, "interlinking": { "links_per_article_min": 3, "links_per_article_max": 6 } } } } ] } if deployment_targets_list: job_template["jobs"][0]["deployment_targets"] = deployment_targets_list with open(filepath, 'w', encoding='utf-8') as f: json.dump(job_template, f, indent=2) click.echo(f"\nJob file created: {filepath}") click.echo(f"Project: {project.name} (ID: {project_id})") click.echo(f"Tier1: {tier1_count} articles") click.echo(f"Tier2: {tier2_count} articles") if deployment_targets_list: click.echo(f"Deployment targets: {', '.join(deployment_targets_list)}") click.echo(f"\nTo run this job:") click.echo(f" uv run python main.py generate-batch --job-file {filepath} -u {username} --password ") finally: session.close() except Exception as e: click.echo(f"Error creating job file: {e}", err=True) raise click.Abort() @app.command("generate-batch") @click.option('--job-file', '-j', required=True, type=click.Path(exists=True), help='Path to job JSON file') @click.option('--username', '-u', help='Username for authentication') @click.option('--password', '-p', help='Password for authentication') @click.option('--debug', is_flag=True, help='Save AI responses to debug_output/') @click.option('--continue-on-error', is_flag=True, help='Continue processing if article generation fails') @click.option('--model', '-m', default='gpt-4o-mini', help='AI model to use (gpt-4o-mini, x-ai/grok-4-fast)') def generate_batch( job_file: str, username: Optional[str], password: Optional[str], debug: bool, continue_on_error: bool, model: str ): """Generate content batch from job file""" try: if not username: username = os.getenv("CLIENT_USERNAME") if not password: password = os.getenv("CLIENT_PASSWORD") if not username or not password: username, password = prompt_admin_credentials() session = db_manager.get_session() try: user_repo = UserRepository(session) auth_service = AuthService(user_repo) user = auth_service.authenticate_user(username, password) if not user: click.echo("Error: Authentication failed", err=True) raise click.Abort() click.echo(f"Authenticated as: {user.username} ({user.role})") api_key = os.getenv("OPENROUTER_API_KEY") if not api_key: click.echo("Error: OPENROUTER_API_KEY not found in environment", err=True) click.echo("Please set OPENROUTER_API_KEY in your .env file", err=True) raise click.Abort() from src.generation.job_config import JobConfig job_config = JobConfig(job_file) jobs = job_config.get_jobs() has_models_in_job = any(job.models is not None for job in jobs) if has_models_in_job and model != 'gpt-4o-mini': click.echo(f"Warning: Job file contains per-stage model configuration.") click.echo(f" The --model flag will be ignored in favor of job config.\n") click.echo(f"Initializing AI client with default model: {model}") ai_client = AIClient(api_key=api_key, model=model) prompt_manager = PromptManager() project_repo = ProjectRepository(session) content_repo = GeneratedContentRepository(session) site_deployment_repo = SiteDeploymentRepository(session) content_generator = ContentGenerator( ai_client=ai_client, prompt_manager=prompt_manager, project_repo=project_repo, content_repo=content_repo ) max_workers = get_concurrent_workers() job_max_workers = jobs[0].max_workers if jobs and jobs[0].max_workers else None final_max_workers = job_max_workers or max_workers batch_processor = BatchProcessor( content_generator=content_generator, content_repo=content_repo, project_repo=project_repo, site_deployment_repo=site_deployment_repo, max_workers=final_max_workers ) click.echo(f"\nProcessing job file: {job_file}") click.echo(f"Concurrent workers: {final_max_workers}") if debug: click.echo("Debug mode: AI responses will be saved to debug_output/\n") batch_processor.process_job( job_file_path=job_file, debug=debug, continue_on_error=continue_on_error ) done_dir = os.path.join("jobs", "done") os.makedirs(done_dir, exist_ok=True) job_path = job_file job_filename = os.path.basename(job_path) destination = os.path.join(done_dir, job_filename) if os.path.exists(job_path): os.rename(job_path, destination) click.echo(f"\nJob file moved to: {destination}") finally: session.close() except Exception as e: click.echo(f"Error processing batch: {e}", err=True) raise click.Abort() @app.command("deploy-batch") @click.option('--batch-id', '-b', required=True, type=int, help='Project/batch ID to deploy') @click.option('--username', '-u', help='Username for authentication') @click.option('--password', '-p', help='Password for authentication') @click.option('--continue-on-error', is_flag=True, default=True, help='Continue if file fails (default: True)') @click.option('--dry-run', is_flag=True, help='Preview what would be deployed') def deploy_batch( batch_id: int, username: Optional[str], password: Optional[str], continue_on_error: bool, dry_run: bool ): """Deploy all content in a batch to cloud storage""" try: if not username or not password: username, password = prompt_admin_credentials() admin = authenticate_admin(username, password) if not admin: click.echo("Error: Authentication failed or insufficient permissions", err=True) raise click.Abort() click.echo(f"Authenticated as: {admin.username} ({admin.role})") session = db_manager.get_session() try: project_repo = ProjectRepository(session) content_repo = GeneratedContentRepository(session) site_repo = SiteDeploymentRepository(session) page_repo = SitePageRepository(session) project = project_repo.get_by_id(batch_id) if not project: click.echo(f"Error: Project/batch {batch_id} not found", err=True) raise click.Abort() click.echo(f"\nDeploying batch: {project.name} (ID: {batch_id})") click.echo(f"Keyword: {project.main_keyword}") articles = content_repo.get_by_project_id(batch_id) click.echo(f"Found {len(articles)} articles") if dry_run: click.echo("\nDRY RUN MODE - No files will be uploaded\n") for article in articles: if not article.site_deployment_id: click.echo(f"SKIP: Article {article.id} - No site assigned") continue site = site_repo.get_by_id(article.site_deployment_id) if not site: click.echo(f"SKIP: Article {article.id} - Site not found") continue from src.generation.url_generator import generate_file_path, generate_public_url file_path = generate_file_path(article) url = generate_public_url(site, file_path) click.echo(f"WOULD DEPLOY: {article.title[:50]}") click.echo(f" File: {file_path}") click.echo(f" URL: {url}") site_ids = set(a.site_deployment_id for a in articles if a.site_deployment_id) for site_id in site_ids: pages = page_repo.get_by_site(site_id) for page in pages: click.echo(f"WOULD DEPLOY: {page.page_type}.html") click.echo("\nDry run complete. Use without --dry-run to actually deploy.") return url_logger = URLLogger() deployment_service = DeploymentService( content_repo=content_repo, site_repo=site_repo, page_repo=page_repo, url_logger=url_logger ) click.echo(f"\nStarting deployment...") click.echo(f"Continue on error: {continue_on_error}") click.echo("") results = deployment_service.deploy_batch( project_id=batch_id, continue_on_error=continue_on_error ) click.echo("\n" + "=" * 70) click.echo("Deployment Summary") click.echo("=" * 70) click.echo(f"Articles deployed: {results['articles_deployed']}") click.echo(f"Articles failed: {results['articles_failed']}") click.echo(f"Pages deployed: {results['pages_deployed']}") click.echo(f"Pages failed: {results['pages_failed']}") click.echo(f"Total time: {results['total_time']:.1f}s") if results['errors']: click.echo("\nErrors:") for error in results['errors']: if error['type'] == 'article': click.echo(f" Article {error['id']} ({error.get('title', 'N/A')[:40]}): {error['error']}") else: click.echo(f" Page {error.get('page_type', 'N/A')} (Site {error.get('site_id')}): {error['error']}") click.echo("=" * 70) if results['articles_failed'] > 0 or results['pages_failed'] > 0: raise click.Abort() except BunnyStorageError as e: click.echo(f"\nError: Storage upload failed - {e}", err=True) raise click.Abort() finally: session.close() except Exception as e: click.echo(f"Error deploying batch: {e}", err=True) raise click.Abort() @app.command("verify-deployment") @click.option('--batch-id', '-b', required=True, type=int, help='Project/batch ID to verify') @click.option('--sample', '-s', type=int, help='Number of random URLs to check (default: check all)') @click.option('--timeout', '-t', type=int, default=10, help='Request timeout in seconds (default: 10)') def verify_deployment(batch_id: int, sample: Optional[int], timeout: int): """Verify deployed URLs return 200 OK status""" try: session = db_manager.get_session() try: content_repo = GeneratedContentRepository(session) project_repo = ProjectRepository(session) project = project_repo.get_by_id(batch_id) if not project: click.echo(f"Error: Project/batch {batch_id} not found", err=True) raise click.Abort() click.echo(f"Verifying deployment for batch: {project.name} (ID: {batch_id})") click.echo(f"Keyword: {project.main_keyword}\n") articles = content_repo.get_by_project_id(batch_id) deployed_articles = [a for a in articles if a.deployed_url and a.status == 'deployed'] if not deployed_articles: click.echo("No deployed articles found for this batch.") return click.echo(f"Found {len(deployed_articles)} deployed articles") urls_to_check = deployed_articles if sample and sample < len(deployed_articles): urls_to_check = random.sample(deployed_articles, sample) click.echo(f"Checking random sample of {sample} URLs\n") else: click.echo(f"Checking all {len(deployed_articles)} URLs\n") successful = [] failed = [] for article in urls_to_check: url = article.deployed_url try: response = requests.get(url, timeout=timeout, allow_redirects=True) if response.status_code == 200: successful.append((url, article.title)) click.echo(f"✓ {url}") else: failed.append((url, article.title, response.status_code)) click.echo(f"✗ {url} (HTTP {response.status_code})") except requests.exceptions.RequestException as e: failed.append((url, article.title, str(e))) click.echo(f"✗ {url} (Error: {type(e).__name__})") click.echo("\n" + "=" * 70) click.echo("Verification Summary") click.echo("=" * 70) click.echo(f"Total checked: {len(urls_to_check)}") click.echo(f"Successful: {len(successful)}") click.echo(f"Failed: {len(failed)}") if failed: click.echo("\nFailed URLs:") for url, title, error in failed: title_preview = title[:50] + "..." if len(title) > 50 else title click.echo(f" {url}") click.echo(f" Title: {title_preview}") click.echo(f" Error: {error}") click.echo("=" * 70) if failed: raise click.Abort() finally: session.close() except Exception as e: click.echo(f"Error verifying deployment: {e}", err=True) raise click.Abort() @app.command("get-links") @click.option('--project-id', '-p', required=True, type=int, help='Project ID to get links for') @click.option('--tier', '-t', required=True, help='Tier to filter (e.g., "1" or "2+" for tier 2 and above)') @click.option('--with-anchor-text', is_flag=True, help='Include anchor text used for tiered links') @click.option('--with-destination-url', is_flag=True, help='Include destination URL that the article links to') def get_links(project_id: int, tier: str, with_anchor_text: bool, with_destination_url: bool): """Export article URLs with optional link details for a project and tier""" import csv import sys from src.database.repositories import ArticleLinkRepository try: session = db_manager.get_session() try: content_repo = GeneratedContentRepository(session) project_repo = ProjectRepository(session) link_repo = ArticleLinkRepository(session) project = project_repo.get_by_id(project_id) if not project: click.echo(f"Error: Project {project_id} not found", err=True) raise click.Abort() tier_range_mode = False min_tier = 1 if tier.endswith('+'): tier_range_mode = True try: min_tier = int(tier[:-1]) except ValueError: click.echo(f"Error: Invalid tier format '{tier}'. Use '1', '2', or '2+'", err=True) raise click.Abort() else: try: min_tier = int(tier) tier_range_mode = False except ValueError: click.echo(f"Error: Invalid tier format '{tier}'. Use '1', '2', or '2+'", err=True) raise click.Abort() all_articles = content_repo.get_by_project_id(project_id) if tier_range_mode: articles = [a for a in all_articles if a.deployed_url and int(a.tier.replace('tier', '')) >= min_tier] else: tier_str = f"tier{min_tier}" articles = [a for a in all_articles if a.deployed_url and a.tier == tier_str] if not articles: click.echo(f"No deployed articles found for project {project_id} with tier filter '{tier}'", err=True) raise click.Abort() csv_writer = csv.writer(sys.stdout) header = ['article_url', 'tier', 'title'] if with_anchor_text: header.append('anchor_text') if with_destination_url: header.append('destination_url') csv_writer.writerow(header) for article in articles: row = [article.deployed_url, article.tier, article.title] if with_anchor_text or with_destination_url: tiered_links = link_repo.get_by_source_article(article.id) tiered_links = [link for link in tiered_links if link.link_type == 'tiered'] if tiered_links: for link in tiered_links: row_with_link = row.copy() if with_anchor_text: row_with_link.append(link.anchor_text or '') if with_destination_url: if link.to_url: row_with_link.append(link.to_url) elif link.to_content_id: target_article = content_repo.get_by_id(link.to_content_id) row_with_link.append(target_article.deployed_url if target_article and target_article.deployed_url else '') else: row_with_link.append('') csv_writer.writerow(row_with_link) else: if with_anchor_text: row.append('') if with_destination_url: row.append('') csv_writer.writerow(row) else: csv_writer.writerow(row) finally: session.close() except Exception as e: click.echo(f"Error getting links: {e}", err=True) raise click.Abort() if __name__ == "__main__": app()