""" AWS S3 Storage API client for uploading files to S3 buckets Story 6.2: AWS S3 Client Implementation """ import os import time import logging import json from typing import Optional, TYPE_CHECKING, Union from dataclasses import dataclass import boto3 from botocore.exceptions import ClientError, BotoCoreError from botocore.config import Config if TYPE_CHECKING: from src.database.models import SiteDeployment from src.deployment.bunny_storage import UploadResult logger = logging.getLogger(__name__) class S3StorageError(Exception): """Base exception for S3 Storage API errors""" pass class S3StorageAuthError(S3StorageError): """Authentication error with S3 Storage API""" pass class S3StorageClient: """Client for uploading files to AWS S3 buckets""" def __init__(self, max_retries: int = 3): """ Initialize S3 Storage client Args: max_retries: Maximum number of retry attempts for failed uploads """ self.max_retries = max_retries self._client_cache = {} def _get_s3_client(self, region: Optional[str] = None, endpoint_url: Optional[str] = None): """ Get or create boto3 S3 client with credentials from environment Args: region: AWS region (optional, uses AWS_REGION env var or default) endpoint_url: Custom endpoint URL for S3-compatible services Returns: boto3 S3 client instance Raises: S3StorageAuthError: If AWS credentials are missing """ # Create new client if endpoint_url changes (for s3_compatible) cache_key = f"{region or 'default'}:{endpoint_url or 'default'}" if not hasattr(self, '_client_cache'): self._client_cache = {} if cache_key not in self._client_cache: access_key = os.getenv('AWS_ACCESS_KEY_ID') secret_key = os.getenv('AWS_SECRET_ACCESS_KEY') default_region = os.getenv('AWS_REGION', 'us-east-1') if not access_key or not secret_key: raise S3StorageAuthError( "AWS credentials not found. Set AWS_ACCESS_KEY_ID and " "AWS_SECRET_ACCESS_KEY environment variables." ) region_to_use = region or default_region config = Config( retries={'max_attempts': self.max_retries, 'mode': 'adaptive'}, connect_timeout=60, read_timeout=60 ) client_kwargs = { 'aws_access_key_id': access_key, 'aws_secret_access_key': secret_key, 'region_name': region_to_use, 'config': config } if endpoint_url: client_kwargs['endpoint_url'] = endpoint_url client = boto3.client('s3', **client_kwargs) resource = boto3.resource('s3', **client_kwargs) self._client_cache[cache_key] = {'client': client, 'resource': resource} return self._client_cache[cache_key]['client'] def _get_s3_resource(self, region: Optional[str] = None, endpoint_url: Optional[str] = None): """Get or create boto3 S3 resource""" cache_key = f"{region or 'default'}:{endpoint_url or 'default'}" if not hasattr(self, '_client_cache'): self._client_cache = {} if cache_key not in self._client_cache: self._get_s3_client(region, endpoint_url) return self._client_cache[cache_key]['resource'] def _get_bucket_name(self, site: "SiteDeployment") -> str: """ Extract bucket name from SiteDeployment Args: site: SiteDeployment object Returns: S3 bucket name Raises: ValueError: If bucket name is not configured """ bucket_name = getattr(site, 's3_bucket_name', None) if not bucket_name: raise ValueError( "s3_bucket_name not configured for site. " "Set s3_bucket_name in SiteDeployment." ) return bucket_name def _get_bucket_region(self, site: "SiteDeployment") -> str: """ Extract bucket region from SiteDeployment or use default Args: site: SiteDeployment object Returns: AWS region string """ region = getattr(site, 's3_bucket_region', None) if region: return region return os.getenv('AWS_REGION', 'us-east-1') def _get_endpoint_url(self, site: "SiteDeployment") -> Optional[str]: """ Extract custom endpoint URL for S3-compatible services Args: site: SiteDeployment object Returns: Endpoint URL string or None for standard AWS S3 """ return getattr(site, 's3_endpoint_url', None) def _get_content_type(self, file_path: str) -> str: """ Determine content type based on file extension Args: file_path: File path Returns: MIME type string """ file_path_lower = file_path.lower() if file_path_lower.endswith('.html') or file_path_lower.endswith('.htm'): return 'text/html' elif file_path_lower.endswith('.css'): return 'text/css' elif file_path_lower.endswith('.js'): return 'application/javascript' elif file_path_lower.endswith('.json'): return 'application/json' elif file_path_lower.endswith('.xml'): return 'application/xml' elif file_path_lower.endswith('.png'): return 'image/png' elif file_path_lower.endswith('.jpg') or file_path_lower.endswith('.jpeg'): return 'image/jpeg' elif file_path_lower.endswith('.gif'): return 'image/gif' elif file_path_lower.endswith('.svg'): return 'image/svg+xml' else: return 'application/octet-stream' def _configure_bucket_public_read( self, bucket_name: str, region: str, endpoint_url: Optional[str] = None ): """ Configure S3 bucket for public read access only This method: 1. Disables "Block Public Access" settings for read access 2. Applies bucket policy for public read access 3. Validates configuration Args: bucket_name: S3 bucket name region: AWS region endpoint_url: Custom endpoint URL for S3-compatible services Raises: S3StorageError: If configuration fails """ try: s3_client = self._get_s3_client(region, endpoint_url) # Disable Block Public Access settings (required for public read) try: s3_client.put_public_access_block( Bucket=bucket_name, PublicAccessBlockConfiguration={ 'BlockPublicAcls': False, 'IgnorePublicAcls': False, 'BlockPublicPolicy': False, 'RestrictPublicBuckets': False } ) logger.info(f"Disabled Block Public Access settings for bucket {bucket_name}") except ClientError as e: error_code = e.response.get('Error', {}).get('Code', '') if error_code != 'NoSuchBucket': logger.warning(f"Could not update Block Public Access settings: {e}") # Apply bucket policy for public read access only bucket_policy = { "Version": "2012-10-17", "Statement": [ { "Sid": "PublicReadGetObject", "Effect": "Allow", "Principal": "*", "Action": "s3:GetObject", "Resource": f"arn:aws:s3:::{bucket_name}/*" } ] } try: s3_client.put_bucket_policy( Bucket=bucket_name, Policy=json.dumps(bucket_policy) ) logger.info(f"Applied public read bucket policy to {bucket_name}") except ClientError as e: error_code = e.response.get('Error', {}).get('Code', '') if error_code == 'NoSuchBucket': raise S3StorageError(f"Bucket {bucket_name} does not exist") logger.warning(f"Could not apply bucket policy: {e}") # Validate bucket exists try: s3_client.head_bucket(Bucket=bucket_name) except ClientError as e: error_code = e.response.get('Error', {}).get('Code', '') if error_code == '404': raise S3StorageError(f"Bucket {bucket_name} does not exist") elif error_code == '403': raise S3StorageAuthError( f"Access denied to bucket {bucket_name}. " f"Check AWS credentials and bucket permissions." ) raise S3StorageError(f"Failed to validate bucket: {e}") except BotoCoreError as e: raise S3StorageError(f"Failed to configure bucket: {str(e)}") def _generate_public_url( self, bucket_name: str, file_path: str, region: str, custom_domain: Optional[str] = None ) -> str: """ Generate public URL for uploaded file Args: bucket_name: S3 bucket name file_path: File path within bucket region: AWS region custom_domain: Optional custom domain (manual setup required) Returns: Public URL string """ if custom_domain: return f"https://{custom_domain.rstrip('/')}/{file_path}" # Virtual-hosted style URL (default for AWS S3) return f"https://{bucket_name}.s3.{region}.amazonaws.com/{file_path}" def upload_file( self, site: "SiteDeployment", file_path: str, content: Union[str, bytes] ) -> UploadResult: """ Upload a file to S3 bucket Args: site: SiteDeployment object with S3 configuration file_path: Path within bucket (e.g., 'my-article.html') content: File content to upload (str or bytes for binary files like images) Returns: UploadResult with success status and message Raises: S3StorageAuthError: If authentication fails S3StorageError: For other S3 errors ValueError: If required configuration is missing """ bucket_name = self._get_bucket_name(site) region = self._get_bucket_region(site) endpoint_url = self._get_endpoint_url(site) custom_domain = getattr(site, 's3_custom_domain', None) content_type = self._get_content_type(file_path) # Configure bucket for public read access on first upload attempt # This is idempotent and safe to call multiple times try: self._configure_bucket_public_read(bucket_name, region, endpoint_url) except S3StorageError as e: logger.warning(f"Bucket configuration warning: {e}") s3_client = self._get_s3_client(region, endpoint_url) # Handle both string and bytes content if isinstance(content, str): body = content.encode('utf-8') else: body = content # Track which buckets don't support ACLs to avoid retrying if not hasattr(self, '_buckets_no_acl'): self._buckets_no_acl = set() for attempt in range(self.max_retries): try: # Prepare upload parameters upload_kwargs = { 'Bucket': bucket_name, 'Key': file_path, 'Body': body, 'ContentType': content_type } # Only add ACL if bucket supports it if bucket_name not in self._buckets_no_acl: try: upload_kwargs['ACL'] = 'public-read' s3_client.put_object(**upload_kwargs) except ClientError as acl_error: acl_error_code = acl_error.response.get('Error', {}).get('Code', '') if acl_error_code == 'AccessControlListNotSupported': # Bucket doesn't support ACLs, retry without ACL # Bucket policy should handle public access self._buckets_no_acl.add(bucket_name) logger.info(f"Bucket {bucket_name} does not support ACLs, using bucket policy for public access") upload_kwargs.pop('ACL', None) s3_client.put_object(**upload_kwargs) else: raise else: # Bucket known to not support ACLs, upload without ACL s3_client.put_object(**upload_kwargs) public_url = self._generate_public_url( bucket_name, file_path, region, custom_domain ) logger.info(f"Uploaded {file_path} to s3://{bucket_name}/{file_path}") return UploadResult( success=True, file_path=file_path, message=f"Upload successful. Public URL: {public_url}" ) except ClientError as e: error_code = e.response.get('Error', {}).get('Code', '') error_message = e.response.get('Error', {}).get('Message', str(e)) # Handle specific error codes if error_code == 'NoSuchBucket': raise S3StorageError( f"Bucket {bucket_name} does not exist. " f"Create the bucket first or check bucket name." ) if error_code == '403' or error_code == 'AccessDenied': raise S3StorageAuthError( f"Access denied to bucket {bucket_name}. " f"Check AWS credentials and bucket permissions. " f"Error: {error_message}" ) if error_code == '404': raise S3StorageError( f"Bucket {bucket_name} not found in region {region}" ) # Retry on transient errors if attempt < self.max_retries - 1: wait_time = 2 ** attempt logger.warning( f"S3 upload failed (attempt {attempt + 1}/{self.max_retries}): " f"{error_code} - {error_message}. Retrying in {wait_time}s" ) time.sleep(wait_time) continue raise S3StorageError( f"S3 upload failed after {self.max_retries} attempts: " f"{error_code} - {error_message}" ) except BotoCoreError as e: if attempt < self.max_retries - 1: wait_time = 2 ** attempt logger.warning( f"S3 upload error (attempt {attempt + 1}/{self.max_retries}): " f"{str(e)}. Retrying in {wait_time}s" ) time.sleep(wait_time) continue raise S3StorageError( f"S3 upload failed after {self.max_retries} attempts: {str(e)}" ) raise S3StorageError(f"Upload failed after {self.max_retries} attempts") def map_aws_region_to_short_code(aws_region: str) -> str: """ Map AWS region code (e.g., 'us-east-1') to short region code used by the system Args: aws_region: AWS region code (e.g., 'us-east-1', 'eu-west-1') Returns: Short region code (e.g., 'US', 'EU') Note: Returns 'US' as default for unknown regions """ region_mapping = { # US regions 'us-east-1': 'US', 'us-east-2': 'US', 'us-west-1': 'US', 'us-west-2': 'US', # EU regions 'eu-west-1': 'EU', 'eu-west-2': 'EU', 'eu-west-3': 'EU', 'eu-central-1': 'EU', 'eu-north-1': 'EU', 'eu-south-1': 'EU', # Asia Pacific 'ap-southeast-1': 'SG', 'ap-southeast-2': 'SYD', 'ap-northeast-1': 'JP', 'ap-northeast-2': 'KR', 'ap-south-1': 'IN', # Other 'ca-central-1': 'CA', 'sa-east-1': 'SA', 'af-south-1': 'AF', 'me-south-1': 'ME', } return region_mapping.get(aws_region.lower(), 'US')