""" S3 Bucket Discovery and Registration Script Discovers all AWS S3 buckets and allows interactive selection to register them as SiteDeployment records for use in the site assignment pool. """ import os import sys import hashlib import logging from typing import List, Dict, Optional from datetime import datetime import boto3 import click from botocore.exceptions import ClientError, BotoCoreError, NoCredentialsError # Add parent directory to path for imports sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from src.database.session import db_manager from src.database.repositories import SiteDeploymentRepository from src.deployment.s3_storage import map_aws_region_to_short_code logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class BucketInfo: """Information about an S3 bucket""" def __init__(self, name: str, region: str, creation_date: Optional[datetime] = None): self.name = name self.region = region self.creation_date = creation_date self.is_registered = False def __repr__(self): return f"BucketInfo(name={self.name}, region={self.region})" def get_s3_client(): """ Create and return a boto3 S3 client Raises: SystemExit: If AWS credentials are not found """ try: access_key = os.getenv('AWS_ACCESS_KEY_ID') secret_key = os.getenv('AWS_SECRET_ACCESS_KEY') if not access_key or not secret_key: click.echo("Error: AWS credentials not found.", err=True) click.echo("Please set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables.", err=True) sys.exit(1) return boto3.client('s3') except Exception as e: click.echo(f"Error creating S3 client: {e}", err=True) sys.exit(1) def list_all_buckets(s3_client) -> List[BucketInfo]: """ List all S3 buckets and retrieve their metadata Args: s3_client: boto3 S3 client Returns: List of BucketInfo objects Raises: SystemExit: If unable to list buckets """ try: response = s3_client.list_buckets() buckets = [] for bucket in response.get('Buckets', []): bucket_name = bucket['Name'] creation_date = bucket.get('CreationDate') # Get bucket region try: region_response = s3_client.get_bucket_location(Bucket=bucket_name) region = region_response.get('LocationConstraint', 'us-east-1') # AWS returns None for us-east-1, so normalize it if region is None or region == '': region = 'us-east-1' except ClientError as e: error_code = e.response.get('Error', {}).get('Code', '') if error_code == 'AccessDenied': logger.warning(f"Access denied to get region for bucket {bucket_name}, using default") region = 'us-east-1' else: logger.warning(f"Could not get region for bucket {bucket_name}: {e}, using default") region = 'us-east-1' buckets.append(BucketInfo( name=bucket_name, region=region, creation_date=creation_date )) return buckets except NoCredentialsError: click.echo("Error: AWS credentials not found or invalid.", err=True) click.echo("Please configure AWS credentials using:", err=True) click.echo(" - Environment variables: AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY", err=True) click.echo(" - AWS credentials file: ~/.aws/credentials", err=True) click.echo(" - IAM role (if running on EC2)", err=True) sys.exit(1) except ClientError as e: error_code = e.response.get('Error', {}).get('Code', '') error_message = e.response.get('Error', {}).get('Message', str(e)) click.echo(f"Error listing buckets: {error_code} - {error_message}", err=True) if error_code == 'AccessDenied': click.echo("Insufficient permissions. Ensure your AWS credentials have s3:ListAllMyBuckets permission.", err=True) sys.exit(1) except Exception as e: click.echo(f"Unexpected error listing buckets: {e}", err=True) sys.exit(1) def check_existing_deployments(site_repo: SiteDeploymentRepository, bucket_names: List[str]) -> Dict[str, bool]: """ Check which buckets are already registered in the database Args: site_repo: SiteDeploymentRepository instance bucket_names: List of bucket names to check Returns: Dictionary mapping bucket names to boolean (True if registered) """ existing = {} all_sites = site_repo.get_all() registered_buckets = { site.s3_bucket_name for site in all_sites if site.s3_bucket_name and site.storage_provider in ('s3', 's3_compatible') } for bucket_name in bucket_names: existing[bucket_name] = bucket_name in registered_buckets return existing def generate_unique_hostname(bucket_name: str, site_repo: SiteDeploymentRepository, attempt: int = 0) -> str: """ Generate a unique hostname for the pull_zone_bcdn_hostname field Args: bucket_name: S3 bucket name site_repo: SiteDeploymentRepository to check for existing hostnames attempt: Retry attempt number (for appending suffix) Returns: Unique hostname string """ if attempt == 0: base_hostname = f"s3-{bucket_name}.b-cdn.net" else: base_hostname = f"s3-{bucket_name}-{attempt}.b-cdn.net" # Check if hostname already exists existing = site_repo.get_by_bcdn_hostname(base_hostname) if existing is None: return base_hostname # Try again with incremented suffix return generate_unique_hostname(bucket_name, site_repo, attempt + 1) def generate_bucket_hash(bucket_name: str) -> int: """ Generate a numeric hash from bucket name for placeholder IDs Args: bucket_name: S3 bucket name Returns: Integer hash (positive, within reasonable range) """ hash_obj = hashlib.md5(bucket_name.encode()) hash_int = int(hash_obj.hexdigest(), 16) # Take modulo to keep it reasonable, but ensure it's positive return abs(hash_int % 1000000) def register_bucket( bucket_info: BucketInfo, site_repo: SiteDeploymentRepository, site_name: Optional[str] = None, custom_domain: Optional[str] = None ) -> bool: """ Register an S3 bucket as a SiteDeployment record Args: bucket_info: BucketInfo object with bucket details site_repo: SiteDeploymentRepository instance site_name: Optional site name (defaults to bucket name) custom_domain: Optional custom domain for S3 Returns: True if successful, False otherwise """ bucket_name = bucket_info.name bucket_region = bucket_info.region # Check if already registered all_sites = site_repo.get_all() for site in all_sites: if site.s3_bucket_name == bucket_name and site.storage_provider == 's3': click.echo(f" [SKIP] Bucket '{bucket_name}' is already registered (site_id={site.id})") return False # Generate placeholder values for Bunny.net fields bucket_hash = generate_bucket_hash(bucket_name) short_region = map_aws_region_to_short_code(bucket_region) unique_hostname = generate_unique_hostname(bucket_name, site_repo) # Use provided site_name or default to bucket name final_site_name = site_name or bucket_name try: deployment = site_repo.create( site_name=final_site_name, storage_provider='s3', storage_zone_id=bucket_hash, storage_zone_name=f"s3-{bucket_name}", storage_zone_password="s3-placeholder", storage_zone_region=short_region, pull_zone_id=bucket_hash, pull_zone_bcdn_hostname=unique_hostname, custom_hostname=None, s3_bucket_name=bucket_name, s3_bucket_region=bucket_region, s3_custom_domain=custom_domain, s3_endpoint_url=None ) click.echo(f" [OK] Registered bucket '{bucket_name}' as site_id={deployment.id}") return True except ValueError as e: click.echo(f" [ERROR] Failed to register bucket '{bucket_name}': {e}", err=True) return False except Exception as e: click.echo(f" [ERROR] Unexpected error registering bucket '{bucket_name}': {e}", err=True) return False def display_buckets(buckets: List[BucketInfo], existing_map: Dict[str, bool]): """ Display buckets in a formatted table Args: buckets: List of BucketInfo objects existing_map: Dictionary mapping bucket names to registration status """ click.echo("\n" + "=" * 80) click.echo("Available S3 Buckets") click.echo("=" * 80) click.echo(f"{'#':<4} {'Bucket Name':<40} {'Region':<15} {'Status':<15}") click.echo("-" * 80) for idx, bucket in enumerate(buckets, 1): bucket.is_registered = existing_map.get(bucket.name, False) status = "[REGISTERED]" if bucket.is_registered else "[AVAILABLE]" click.echo(f"{idx:<4} {bucket.name:<40} {bucket.region:<15} {status:<15}") click.echo("=" * 80) def main(): """Main entry point for the discovery script""" click.echo("S3 Bucket Discovery and Registration") click.echo("=" * 80) # Initialize database try: db_manager.initialize() except Exception as e: click.echo(f"Error initializing database: {e}", err=True) sys.exit(1) session = db_manager.get_session() site_repo = SiteDeploymentRepository(session) try: # Get S3 client click.echo("\nConnecting to AWS S3...") s3_client = get_s3_client() # List all buckets click.echo("Discovering S3 buckets...") buckets = list_all_buckets(s3_client) if not buckets: click.echo("No S3 buckets found in your AWS account.") return # Check which buckets are already registered bucket_names = [b.name for b in buckets] existing_map = check_existing_deployments(site_repo, bucket_names) # Display buckets display_buckets(buckets, existing_map) # Filter out already registered buckets available_buckets = [b for b in buckets if not existing_map.get(b.name, False)] if not available_buckets: click.echo("\nAll buckets are already registered.") return # Prompt for bucket selection click.echo(f"\nFound {len(available_buckets)} available bucket(s) to register.") click.echo("Enter bucket numbers to register (comma-separated, e.g., 1,3,5):") click.echo("Or press Enter to skip registration.") selection_input = click.prompt("Selection", default="", type=str).strip() if not selection_input: click.echo("No buckets selected. Exiting.") return # Parse selection try: selected_indices = [int(x.strip()) - 1 for x in selection_input.split(',')] except ValueError: click.echo("Error: Invalid selection format. Use comma-separated numbers (e.g., 1,3,5)", err=True) return # Validate indices valid_selections = [] for idx in selected_indices: if 0 <= idx < len(buckets): if buckets[idx].name in [b.name for b in available_buckets]: valid_selections.append(buckets[idx]) else: click.echo(f"Warning: Bucket #{idx + 1} is already registered, skipping.", err=True) else: click.echo(f"Warning: Invalid bucket number {idx + 1}, skipping.", err=True) if not valid_selections: click.echo("No valid buckets selected.") return # Register selected buckets click.echo(f"\nRegistering {len(valid_selections)} bucket(s)...") success_count = 0 for bucket_info in valid_selections: click.echo(f"\nRegistering bucket: {bucket_info.name}") # Prompt for site name default_site_name = bucket_info.name site_name = click.prompt("Site name", default=default_site_name, type=str).strip() if not site_name: site_name = default_site_name # Prompt for custom domain (optional) custom_domain = click.prompt( "Custom domain (optional, press Enter to skip)", default="", type=str ).strip() if not custom_domain: custom_domain = None # Confirm registration if click.confirm(f"Register '{bucket_info.name}' as '{site_name}'?"): if register_bucket(bucket_info, site_repo, site_name, custom_domain): success_count += 1 else: click.echo(f" [SKIP] Registration cancelled for '{bucket_info.name}'") click.echo(f"\n{'=' * 80}") click.echo(f"Registration complete: {success_count}/{len(valid_selections)} bucket(s) registered.") click.echo("=" * 80) except KeyboardInterrupt: click.echo("\n\nOperation cancelled by user.") sys.exit(0) except Exception as e: click.echo(f"\nUnexpected error: {e}", err=True) logger.exception("Unexpected error in bucket discovery") sys.exit(1) finally: session.close() if __name__ == "__main__": main()