Big-Link-Man/src/cli/commands.py

1945 lines
84 KiB
Python

"""
CLI command definitions using Click
"""
import random
import click
from typing import Optional, List
from src.core.config import get_config, get_bunny_account_api_key, get_concurrent_workers
from src.auth.service import AuthService
from src.database.session import db_manager
from src.database.repositories import UserRepository, SiteDeploymentRepository, ProjectRepository
from src.database.models import User
from src.interlinking.anchor_text_generator import AnchorTextGenerator
from src.deployment.bunnynet import (
BunnyNetClient,
BunnyNetAPIError,
BunnyNetAuthError,
BunnyNetResourceConflictError
)
from src.ingestion.parser import CORAParser, CORAParseError, SimpleSpreadsheetParser
from src.generation.ai_client import AIClient, PromptManager
from src.generation.service import ContentGenerator
from src.generation.batch_processor import BatchProcessor
from src.database.repositories import GeneratedContentRepository, SitePageRepository
from src.deployment.bunny_storage import BunnyStorageError
from src.deployment.deployment_service import DeploymentService
from src.deployment.url_logger import URLLogger
from src.templating.service import TemplateService
from dotenv import load_dotenv
import os
import requests
import random
import json
from pathlib import Path
from datetime import datetime
# Load .env file at module level
load_dotenv()
def _get_brands_for_url(url: str) -> List[str]:
"""
Look up brand names for a given URL from brands.json
Args:
url: Money site URL (e.g., "https://www.gullco.com")
Returns:
List of brand names, or empty list if not found or file missing
"""
try:
from urllib.parse import urlparse
# Normalize URL: remove scheme, www., trailing slash
parsed = urlparse(url)
domain = parsed.netloc
# Remove www. prefix if present
if domain.startswith('www.'):
domain = domain[4:]
# Load brands.json from project root
brands_file = Path("brands.json")
if not brands_file.exists():
return []
with open(brands_file, 'r', encoding='utf-8') as f:
brands_data = json.load(f)
# Look up normalized domain
return brands_data.get(domain, [])
except Exception:
return []
def create_job_file_for_project(
project_id: int,
project_name: str,
session,
tier1_branded_ratio: Optional[float] = None,
tier1_branded_text: Optional[str] = None,
tier1_branded_plus_ratio: Optional[float] = None,
brand_names: Optional[List[str]] = None,
random_deployment_targets: Optional[int] = None
) -> Optional[str]:
"""
Create a job JSON file for a newly created project.
Args:
project_id: The ID of the created project
project_name: The name of the project (for filename)
session: Database session
tier1_branded_ratio: Optional ratio of branded anchor text for tier1 (0.0-1.0)
tier1_branded_text: Optional branded anchor text (company name) for tier1
tier1_branded_plus_ratio: Optional ratio of branded+ anchor text for tier1 (0.0-1.0, applied to remaining slots after branded)
brand_names: Optional list of brand names for branded+ generation
random_deployment_targets: Optional number of random deployment targets to select (default: random 2-3)
Returns:
Path to created file, or None if creation failed
"""
try:
deployment_repo = SiteDeploymentRepository(session)
sites = deployment_repo.get_all()
available_domains = [
site.custom_hostname
for site in sites
if site.custom_hostname is not None
]
if not available_domains:
click.echo("Warning: No domains with custom hostnames found. Job file not created.", err=True)
return None
t1_count = tier1_count if tier1_count is not None else random.randint(10, 12)
t2_count = random.randint(30, 45)
if random_deployment_targets is not None:
num_targets = min(random_deployment_targets, len(available_domains))
else:
num_targets = min(random.randint(2, 3), len(available_domains))
selected_domains = random.sample(available_domains, num_targets)
sanitized_name = "".join(c if c.isalnum() or c in ('-', '_') else '-' for c in project_name.lower()).strip('-')
sanitized_name = '-'.join(sanitized_name.split())
jobs_dir = Path("jobs")
jobs_dir.mkdir(exist_ok=True)
base_filename = f"{sanitized_name}.json"
filepath = jobs_dir / base_filename
if filepath.exists():
date_suffix = datetime.now().strftime("%y%m%d")
base_filename = f"{sanitized_name}-{date_suffix}.json"
filepath = jobs_dir / base_filename
# Build tier1 configuration
tier1_config = {
"count": t1_count,
"min_word_count": 1250,
"max_word_count": 2000,
"models": {
"title": "openai/gpt-4o-mini",
"outline": "openai/gpt-4o-mini",
"content": "x-ai/grok-4-fast"
}
}
# Add anchor_text_config if branded ratio/text or branded+ ratio is provided
if (tier1_branded_ratio is not None and tier1_branded_text) or (tier1_branded_plus_ratio is not None and brand_names):
# Get project to retrieve main_keyword for non-branded terms
project_repo = ProjectRepository(session)
project = project_repo.get_by_id(project_id)
if project and project.main_keyword:
# First, get the actual available anchor text terms
# Use custom anchor text from CORA if available, otherwise generate keyword variations
if project.custom_anchor_text and len(project.custom_anchor_text) > 0:
keyword_variations = project.custom_anchor_text
elif project.related_searches and len(project.related_searches) > 0:
keyword_variations = project.related_searches
else:
anchor_generator = AnchorTextGenerator()
keyword_variations = anchor_generator._generate_from_keyword(project, 10)
# Use the ACTUAL count of available terms
actual_count = len(keyword_variations)
# Calculate branded and remaining counts based on actual available terms
branded_count = 0
if tier1_branded_ratio is not None and tier1_branded_text:
branded_count = int(actual_count * tier1_branded_ratio)
remaining_count = actual_count - branded_count
# Parse comma-separated branded anchor texts
branded_texts = []
if tier1_branded_text:
branded_texts = [text.strip() for text in tier1_branded_text.split(',') if text.strip()]
# Create anchor text list starting with branded terms
anchor_terms = []
for i in range(branded_count):
branded_text = branded_texts[i % len(branded_texts)] # Cycle through branded texts
anchor_terms.append(branded_text)
# Generate branded+ terms if enabled
branded_plus_count = 0
if tier1_branded_plus_ratio is not None and brand_names and len(brand_names) > 0:
branded_plus_count = int(remaining_count * tier1_branded_plus_ratio)
# Generate branded+ terms from brands + related_searches
# Use related_searches from project, or fallback to keyword_variations
related_searches = project.related_searches if project.related_searches else keyword_variations
branded_plus_terms = []
for brand in brand_names:
for term in related_searches:
branded_plus_terms.append(f"{brand} {term}")
branded_plus_terms.append(f"{term} by {brand}")
# Randomly select the needed number of branded+ terms
if len(branded_plus_terms) > 0:
if branded_plus_count > len(branded_plus_terms):
selected_branded_plus = branded_plus_terms
else:
selected_branded_plus = random.sample(branded_plus_terms, branded_plus_count)
anchor_terms.extend(selected_branded_plus)
# Calculate regular count from remaining slots
regular_count = remaining_count - branded_plus_count
# Add regular terms
if regular_count > 0:
# Randomize keyword selection if we're not using all available terms
if regular_count < len(keyword_variations):
selected_keywords = random.sample(keyword_variations, regular_count)
else:
selected_keywords = keyword_variations[:regular_count]
anchor_terms.extend(selected_keywords)
tier1_config["anchor_text_config"] = {
"mode": "explicit",
"terms": anchor_terms
}
job_template = {
"jobs": [
{
"project_id": project_id,
"deployment_targets": selected_domains,
"tiers": {
"tier1": tier1_config,
"tier2": {
"count": t2_count,
"min_word_count": 1000,
"max_word_count": 1250,
"models": {
"title": "openai/gpt-4o-mini",
"outline": "openai/gpt-4o-mini",
"content": "openai/gpt-4o-mini"
},
"interlinking": {
"links_per_article_min": 3,
"links_per_article_max": 6
}
}
}
}
]
}
with open(filepath, 'w', encoding='utf-8') as f:
json.dump(job_template, f, indent=2)
return str(filepath)
except Exception as e:
click.echo(f"Warning: Failed to create job file: {e}", err=True)
return None
def authenticate_admin(username: str, password: str) -> Optional[User]:
"""
Authenticate a user and verify they have admin role
Args:
username: The username to authenticate
password: The password to authenticate
Returns:
User object if authenticated and is admin, None otherwise
"""
session = db_manager.get_session()
try:
user_repo = UserRepository(session)
auth_service = AuthService(user_repo)
user = auth_service.authenticate_user(username, password)
if user and user.is_admin():
return user
return None
finally:
session.close()
def prompt_admin_credentials() -> tuple[str, str]:
"""
Prompt for admin username and password
Checks environment variables CLIENT_USERNAME and CLIENT_PASSWORD first
Returns:
Tuple of (username, password)
"""
username = os.getenv("CLIENT_USERNAME")
password = os.getenv("CLIENT_PASSWORD")
if username and password:
return username, password
click.echo("Admin authentication required")
if not username:
username = click.prompt("Username", type=str)
if not password:
password = click.prompt("Password", type=str, hide_input=True)
return username, password
@click.group()
@click.version_option(version="1.0.0")
def app():
"""Content Automation & Syndication Platform CLI"""
pass
@app.command()
def config():
"""Show current configuration"""
try:
config = get_config()
click.echo("Current Configuration:")
click.echo(f"Application: {config.application.name} v{config.application.version}")
click.echo(f"Environment: {config.application.environment}")
click.echo(f"Database: {config.database.url}")
click.echo(f"AI Model: {config.ai_service.model}")
click.echo(f"Log Level: {config.logging.level}")
except Exception as e:
click.echo(f"Error loading configuration: {e}", err=True)
@app.command()
def health():
"""Check system health"""
try:
config = get_config()
click.echo("[OK] Configuration loaded successfully")
click.echo("[OK] System is healthy")
except Exception as e:
click.echo(f"[ERROR] System health check failed: {e}", err=True)
raise click.Abort()
@app.command()
def models():
"""List available AI models"""
try:
config = get_config()
click.echo("Available AI Models:")
click.echo(f"Current: {config.ai_service.model}")
click.echo(f"Provider: {config.ai_service.provider}")
click.echo(f"Base URL: {config.ai_service.base_url}")
click.echo("\nAvailable models:")
for model_name, model_id in config.ai_service.available_models.items():
status = " (current)" if model_id == config.ai_service.model else ""
click.echo(f" {model_name}: {model_id}{status}")
except Exception as e:
click.echo(f"Error listing models: {e}", err=True)
@app.command("add-user")
@click.option("--username", prompt=True, help="Username for the new user")
@click.option("--password", prompt=True, hide_input=True,
confirmation_prompt=True, help="Password for the new user")
@click.option("--role", type=click.Choice(["Admin", "User"], case_sensitive=True),
prompt=True, help="Role for the new user")
@click.option("--admin-user", help="Admin username for authentication")
@click.option("--admin-password", help="Admin password for authentication")
def add_user(username: str, password: str, role: str,
admin_user: Optional[str], admin_password: Optional[str]):
"""Create a new user (requires admin authentication)"""
try:
# Authenticate admin
if not admin_user or not admin_password:
admin_user, admin_password = prompt_admin_credentials()
admin = authenticate_admin(admin_user, admin_password)
if not admin:
click.echo("Error: Authentication failed or insufficient permissions", err=True)
raise click.Abort()
# Create the new user
session = db_manager.get_session()
try:
user_repo = UserRepository(session)
auth_service = AuthService(user_repo)
new_user = auth_service.create_user_with_hashed_password(
username=username,
password=password,
role=role
)
click.echo(f"Success: User '{new_user.username}' created with role '{new_user.role}'")
finally:
session.close()
except ValueError as e:
click.echo(f"Error: {e}", err=True)
raise click.Abort()
except Exception as e:
click.echo(f"Error creating user: {e}", err=True)
raise click.Abort()
@app.command("delete-user")
@click.option("--username", prompt=True, help="Username to delete")
@click.option("--admin-user", help="Admin username for authentication")
@click.option("--admin-password", help="Admin password for authentication")
@click.confirmation_option(prompt="Are you sure you want to delete this user?")
def delete_user(username: str, admin_user: Optional[str],
admin_password: Optional[str]):
"""Delete a user by username (requires admin authentication)"""
try:
# Authenticate admin
if not admin_user or not admin_password:
admin_user, admin_password = prompt_admin_credentials()
admin = authenticate_admin(admin_user, admin_password)
if not admin:
click.echo("Error: Authentication failed or insufficient permissions", err=True)
raise click.Abort()
# Prevent admin from deleting themselves
if admin.username == username:
click.echo("Error: Cannot delete your own account", err=True)
raise click.Abort()
# Delete the user
session = db_manager.get_session()
try:
user_repo = UserRepository(session)
# Check if user exists
user_to_delete = user_repo.get_by_username(username)
if not user_to_delete:
click.echo(f"Error: User '{username}' not found", err=True)
raise click.Abort()
# Delete the user
success = user_repo.delete(user_to_delete.id)
if success:
click.echo(f"Success: User '{username}' has been deleted")
else:
click.echo(f"Error: Failed to delete user '{username}'", err=True)
raise click.Abort()
finally:
session.close()
except Exception as e:
click.echo(f"Error deleting user: {e}", err=True)
raise click.Abort()
@app.command("list-users")
@click.option("--admin-user", help="Admin username for authentication")
@click.option("--admin-password", help="Admin password for authentication")
def list_users(admin_user: Optional[str], admin_password: Optional[str]):
"""List all users (requires admin authentication)"""
try:
# Authenticate admin
if not admin_user or not admin_password:
admin_user, admin_password = prompt_admin_credentials()
admin = authenticate_admin(admin_user, admin_password)
if not admin:
click.echo("Error: Authentication failed or insufficient permissions", err=True)
raise click.Abort()
# List all users
session = db_manager.get_session()
try:
user_repo = UserRepository(session)
users = user_repo.get_all()
if not users:
click.echo("No users found")
return
click.echo(f"\nTotal users: {len(users)}")
click.echo("-" * 60)
click.echo(f"{'ID':<5} {'Username':<20} {'Role':<10} {'Created'}")
click.echo("-" * 60)
for user in users:
created = user.created_at.strftime("%Y-%m-%d %H:%M:%S")
click.echo(f"{user.id:<5} {user.username:<20} {user.role:<10} {created}")
click.echo("-" * 60)
finally:
session.close()
except Exception as e:
click.echo(f"Error listing users: {e}", err=True)
raise click.Abort()
@app.command("provision-site")
@click.option("--name", prompt=True, help="Site name")
@click.option("--domain", prompt=True, help="Custom domain (FQDN, e.g., www.example.com)")
@click.option("--storage-name", prompt=True, help="Storage Zone name (must be globally unique)")
@click.option("--region", prompt=True, type=click.Choice(["DE", "NY", "LA", "SG", "SYD"]),
help="Storage region")
@click.option("--admin-user", help="Admin username for authentication")
@click.option("--admin-password", help="Admin password for authentication")
def provision_site(name: str, domain: str, storage_name: str, region: str,
admin_user: Optional[str], admin_password: Optional[str]):
"""Provision a new site with Storage Zone and Pull Zone (requires admin)"""
try:
# Authenticate admin
if not admin_user or not admin_password:
admin_user, admin_password = prompt_admin_credentials()
admin = authenticate_admin(admin_user, admin_password)
if not admin:
click.echo("Error: Authentication failed or insufficient permissions", err=True)
raise click.Abort()
# Get bunny.net API key
try:
api_key = get_bunny_account_api_key()
except ValueError as e:
click.echo(f"Error: {e}", err=True)
click.echo("Please set BUNNY_ACCOUNT_API_KEY in your .env file", err=True)
raise click.Abort()
click.echo(f"\nProvisioning site '{name}' with domain '{domain}'...")
# Initialize bunny.net client
client = BunnyNetClient(api_key)
session = db_manager.get_session()
try:
deployment_repo = SiteDeploymentRepository(session)
# Check if domain already exists
if deployment_repo.exists(domain):
click.echo(f"Error: Site with domain '{domain}' already exists", err=True)
raise click.Abort()
# Step 1: Create Storage Zone
click.echo(f"Step 1/3: Creating Storage Zone '{storage_name}' in region {region}...")
storage_result = client.create_storage_zone(storage_name, region)
click.echo(f" Storage Zone created: ID={storage_result.id}")
# Step 2: Create Pull Zone
pull_zone_name = f"{storage_name}-cdn"
click.echo(f"Step 2/3: Creating Pull Zone '{pull_zone_name}'...")
pull_result = client.create_pull_zone(pull_zone_name, storage_result.id)
click.echo(f" Pull Zone created: ID={pull_result.id}, Hostname={pull_result.hostname}")
# Step 3: Add Custom Hostname
click.echo(f"Step 3/3: Adding custom hostname '{domain}'...")
client.add_custom_hostname(pull_result.id, domain)
click.echo(f" Custom hostname added successfully")
# Save to database
deployment = deployment_repo.create(
site_name=name,
custom_hostname=domain,
storage_zone_id=storage_result.id,
storage_zone_name=storage_result.name,
storage_zone_password=storage_result.password,
storage_zone_region=storage_result.region,
pull_zone_id=pull_result.id,
pull_zone_bcdn_hostname=pull_result.hostname
)
# Randomly assign template
template_service = TemplateService()
available_templates = template_service.get_available_templates()
if available_templates:
deployment.template_name = random.choice(available_templates)
session.commit()
session.refresh(deployment)
click.echo(f" Template assigned: {deployment.template_name}")
click.echo("\n" + "=" * 70)
click.echo("Site provisioned successfully!")
click.echo("=" * 70)
click.echo("\nMANUAL DNS CONFIGURATION REQUIRED:")
click.echo("You must create the following CNAME record with your domain registrar:\n")
click.echo(f" Type: CNAME")
subdomain = domain.split('.')[0] if '.' in domain else '@'
click.echo(f" Host: {subdomain}")
click.echo(f" Value: {pull_result.hostname}")
click.echo("\nExample DNS configuration:")
click.echo(f" Type: CNAME")
click.echo(f" Host: {subdomain}")
click.echo(f" Value: {pull_result.hostname}")
click.echo("\nNote: DNS propagation may take up to 48 hours.")
click.echo("=" * 70)
except BunnyNetAuthError as e:
click.echo(f"Error: Authentication failed - {e}", err=True)
click.echo("Please check your BUNNY_ACCOUNT_API_KEY", err=True)
raise click.Abort()
except BunnyNetResourceConflictError as e:
click.echo(f"Error: Resource conflict - {e}", err=True)
click.echo("Storage Zone or Pull Zone name already exists. Try a different name.", err=True)
raise click.Abort()
except BunnyNetAPIError as e:
click.echo(f"Error: bunny.net API error - {e}", err=True)
raise click.Abort()
finally:
session.close()
except Exception as e:
click.echo(f"Error provisioning site: {e}", err=True)
raise click.Abort()
@app.command("attach-domain")
@click.option("--name", prompt=True, help="Site name")
@click.option("--domain", prompt=True, help="Custom domain (FQDN, e.g., www.example.com)")
@click.option("--storage-name", prompt=True, help="Existing Storage Zone name")
@click.option("--admin-user", help="Admin username for authentication")
@click.option("--admin-password", help="Admin password for authentication")
def attach_domain(name: str, domain: str, storage_name: str,
admin_user: Optional[str], admin_password: Optional[str]):
"""Attach a domain to an existing Storage Zone (requires admin)"""
try:
# Authenticate admin
if not admin_user or not admin_password:
admin_user, admin_password = prompt_admin_credentials()
admin = authenticate_admin(admin_user, admin_password)
if not admin:
click.echo("Error: Authentication failed or insufficient permissions", err=True)
raise click.Abort()
# Get bunny.net API key
try:
api_key = get_bunny_account_api_key()
except ValueError as e:
click.echo(f"Error: {e}", err=True)
click.echo("Please set BUNNY_ACCOUNT_API_KEY in your .env file", err=True)
raise click.Abort()
click.echo(f"\nAttaching domain '{domain}' to existing Storage Zone '{storage_name}'...")
# Initialize bunny.net client
client = BunnyNetClient(api_key)
session = db_manager.get_session()
try:
deployment_repo = SiteDeploymentRepository(session)
# Check if domain already exists
if deployment_repo.exists(domain):
click.echo(f"Error: Site with domain '{domain}' already exists", err=True)
raise click.Abort()
# Step 1: Find existing Storage Zone
click.echo(f"Step 1/3: Finding Storage Zone '{storage_name}'...")
storage_result = client.find_storage_zone_by_name(storage_name)
if not storage_result:
click.echo(f"Error: Storage Zone '{storage_name}' not found", err=True)
raise click.Abort()
click.echo(f" Storage Zone found: ID={storage_result.id}")
# Step 2: Create Pull Zone
pull_zone_name = f"{storage_name}-{domain.replace('.', '-')}"
click.echo(f"Step 2/3: Creating Pull Zone '{pull_zone_name}'...")
pull_result = client.create_pull_zone(pull_zone_name, storage_result.id)
click.echo(f" Pull Zone created: ID={pull_result.id}, Hostname={pull_result.hostname}")
# Step 3: Add Custom Hostname
click.echo(f"Step 3/3: Adding custom hostname '{domain}'...")
client.add_custom_hostname(pull_result.id, domain)
click.echo(f" Custom hostname added successfully")
# Save to database
deployment = deployment_repo.create(
site_name=name,
custom_hostname=domain,
storage_zone_id=storage_result.id,
storage_zone_name=storage_result.name,
storage_zone_password=storage_result.password,
storage_zone_region=storage_result.region,
pull_zone_id=pull_result.id,
pull_zone_bcdn_hostname=pull_result.hostname
)
# Randomly assign template
template_service = TemplateService()
available_templates = template_service.get_available_templates()
if available_templates:
deployment.template_name = random.choice(available_templates)
session.commit()
session.refresh(deployment)
click.echo(f" Template assigned: {deployment.template_name}")
click.echo("\n" + "=" * 70)
click.echo("Domain attached successfully!")
click.echo("=" * 70)
click.echo("\nMANUAL DNS CONFIGURATION REQUIRED:")
click.echo("You must create the following CNAME record with your domain registrar:\n")
click.echo(f" Type: CNAME")
subdomain = domain.split('.')[0] if '.' in domain else '@'
click.echo(f" Host: {subdomain}")
click.echo(f" Value: {pull_result.hostname}")
click.echo("\nExample DNS configuration:")
click.echo(f" Type: CNAME")
click.echo(f" Host: {subdomain}")
click.echo(f" Value: {pull_result.hostname}")
click.echo("\nNote: DNS propagation may take up to 48 hours.")
click.echo("=" * 70)
except BunnyNetAuthError as e:
click.echo(f"Error: Authentication failed - {e}", err=True)
click.echo("Please check your BUNNY_ACCOUNT_API_KEY", err=True)
raise click.Abort()
except BunnyNetResourceConflictError as e:
click.echo(f"Error: Resource conflict - {e}", err=True)
click.echo("Pull Zone name already exists. Try a different domain.", err=True)
raise click.Abort()
except BunnyNetAPIError as e:
click.echo(f"Error: bunny.net API error - {e}", err=True)
raise click.Abort()
finally:
session.close()
except Exception as e:
click.echo(f"Error attaching domain: {e}", err=True)
raise click.Abort()
@app.command("list-sites")
@click.option("--admin-user", help="Admin username for authentication")
@click.option("--admin-password", help="Admin password for authentication")
def list_sites(admin_user: Optional[str], admin_password: Optional[str]):
"""List all site deployments (requires admin)"""
try:
# Authenticate admin
if not admin_user or not admin_password:
admin_user, admin_password = prompt_admin_credentials()
admin = authenticate_admin(admin_user, admin_password)
if not admin:
click.echo("Error: Authentication failed or insufficient permissions", err=True)
raise click.Abort()
# List all sites
session = db_manager.get_session()
try:
deployment_repo = SiteDeploymentRepository(session)
sites = deployment_repo.get_all()
if not sites:
click.echo("No site deployments found")
return
click.echo(f"\nTotal sites: {len(sites)}")
click.echo("-" * 100)
click.echo(f"{'ID':<5} {'Site Name':<25} {'Custom Domain':<30} {'Storage Zone':<20} {'Region':<8}")
click.echo("-" * 100)
for site in sites:
click.echo(f"{site.id:<5} {site.site_name:<25} {site.custom_hostname or 'N/A':<30} "
f"{site.storage_zone_name:<20} {site.storage_zone_region:<8}")
click.echo("-" * 100)
finally:
session.close()
except Exception as e:
click.echo(f"Error listing sites: {e}", err=True)
raise click.Abort()
@app.command("discover-s3-buckets")
def discover_s3_buckets():
"""Discover and register AWS S3 buckets as site deployments"""
try:
# Import here to avoid circular dependencies
import subprocess
import sys
from pathlib import Path
# Get the script path
script_dir = Path(__file__).parent.parent.parent
script_path = script_dir / "scripts" / "discover_s3_buckets.py"
if not script_path.exists():
click.echo(f"Error: Discovery script not found at {script_path}", err=True)
raise click.Abort()
# Run the discovery script
click.echo("Running S3 bucket discovery script...\n")
result = subprocess.run([sys.executable, str(script_path)], check=False)
if result.returncode != 0:
click.echo(f"\nDiscovery script exited with code {result.returncode}", err=True)
raise click.Abort()
except FileNotFoundError:
click.echo("Error: Discovery script not found", err=True)
raise click.Abort()
except Exception as e:
click.echo(f"Error running discovery script: {e}", err=True)
raise click.Abort()
@app.command("get-site")
@click.option("--domain", prompt=True, help="Custom domain to lookup")
@click.option("--admin-user", help="Admin username for authentication")
@click.option("--admin-password", help="Admin password for authentication")
def get_site(domain: str, admin_user: Optional[str], admin_password: Optional[str]):
"""Get detailed information about a site deployment (requires admin)"""
try:
# Authenticate admin
if not admin_user or not admin_password:
admin_user, admin_password = prompt_admin_credentials()
admin = authenticate_admin(admin_user, admin_password)
if not admin:
click.echo("Error: Authentication failed or insufficient permissions", err=True)
raise click.Abort()
# Get site details
session = db_manager.get_session()
try:
deployment_repo = SiteDeploymentRepository(session)
site = deployment_repo.get_by_hostname(domain)
if not site:
click.echo(f"Error: Site with domain '{domain}' not found", err=True)
raise click.Abort()
click.echo("\n" + "=" * 70)
click.echo("Site Deployment Details")
click.echo("=" * 70)
click.echo(f"ID: {site.id}")
click.echo(f"Site Name: {site.site_name}")
click.echo(f"Custom Domain: {site.custom_hostname}")
click.echo(f"\nStorage Zone:")
click.echo(f" ID: {site.storage_zone_id}")
click.echo(f" Name: {site.storage_zone_name}")
click.echo(f" Region: {site.storage_zone_region}")
click.echo(f" Password: {site.storage_zone_password}")
click.echo(f"\nPull Zone:")
click.echo(f" ID: {site.pull_zone_id}")
click.echo(f" b-cdn Hostname: {site.pull_zone_bcdn_hostname}")
click.echo(f"\nTimestamps:")
click.echo(f" Created: {site.created_at.strftime('%Y-%m-%d %H:%M:%S')}")
click.echo(f" Updated: {site.updated_at.strftime('%Y-%m-%d %H:%M:%S')}")
click.echo("=" * 70)
finally:
session.close()
except Exception as e:
click.echo(f"Error getting site details: {e}", err=True)
raise click.Abort()
@app.command("remove-site")
@click.option("--domain", prompt=True, help="Custom domain to remove")
@click.option("--admin-user", help="Admin username for authentication")
@click.option("--admin-password", help="Admin password for authentication")
@click.confirmation_option(prompt="Are you sure you want to remove this site deployment record?")
def remove_site(domain: str, admin_user: Optional[str], admin_password: Optional[str]):
"""Remove a site deployment record (requires admin)"""
try:
# Authenticate admin
if not admin_user or not admin_password:
admin_user, admin_password = prompt_admin_credentials()
admin = authenticate_admin(admin_user, admin_password)
if not admin:
click.echo("Error: Authentication failed or insufficient permissions", err=True)
raise click.Abort()
# Remove site
session = db_manager.get_session()
try:
deployment_repo = SiteDeploymentRepository(session)
# Check if site exists
site = deployment_repo.get_by_hostname(domain)
if not site:
click.echo(f"Error: Site with domain '{domain}' not found", err=True)
raise click.Abort()
# Delete the site
success = deployment_repo.delete(site.id)
if success:
click.echo(f"Success: Site deployment record for '{domain}' has been removed")
click.echo("\nNote: This does NOT delete resources from bunny.net.")
click.echo("You must manually delete the Storage Zone and Pull Zone if needed.")
else:
click.echo(f"Error: Failed to remove site '{domain}'", err=True)
raise click.Abort()
finally:
session.close()
except Exception as e:
click.echo(f"Error removing site: {e}", err=True)
raise click.Abort()
@app.command("sync-sites")
@click.option("--admin-user", help="Admin username for authentication")
@click.option("--admin-password", help="Admin password for authentication")
@click.option("--dry-run", is_flag=True, help="Show what would be imported without making changes")
def sync_sites(admin_user: Optional[str], admin_password: Optional[str], dry_run: bool):
"""Sync existing bunny.net sites with custom domains to database (requires admin)"""
try:
# Authenticate admin
if not admin_user or not admin_password:
admin_user, admin_password = prompt_admin_credentials()
admin = authenticate_admin(admin_user, admin_password)
if not admin:
click.echo("Error: Authentication failed or insufficient permissions", err=True)
raise click.Abort()
# Get bunny.net API key
try:
api_key = get_bunny_account_api_key()
except ValueError as e:
click.echo(f"Error: {e}", err=True)
click.echo("Please set BUNNY_ACCOUNT_API_KEY in your .env file", err=True)
raise click.Abort()
click.echo("\nSyncing sites from bunny.net...")
if dry_run:
click.echo("DRY RUN MODE - No changes will be made\n")
# Initialize bunny.net client
client = BunnyNetClient(api_key)
session = db_manager.get_session()
try:
deployment_repo = SiteDeploymentRepository(session)
# Get all storage zones (with passwords!)
click.echo("Fetching Storage Zones from bunny.net...")
storage_zones = client.get_storage_zones()
storage_zone_map = {zone["Id"]: zone for zone in storage_zones}
click.echo(f" Found {len(storage_zones)} Storage Zones")
# Get all pull zones
click.echo("Fetching Pull Zones from bunny.net...")
pull_zones = client.get_pull_zones()
click.echo(f" Found {len(pull_zones)} Pull Zones")
# Process pull zones with custom hostnames
imported = 0
skipped = 0
errors = 0
click.echo("\nProcessing Pull Zones with custom domains...")
click.echo("=" * 80)
for pz in pull_zones:
# Skip if not linked to storage zone
if not pz.get("StorageZoneId"):
continue
storage_zone_id = pz["StorageZoneId"]
storage_zone = storage_zone_map.get(storage_zone_id)
if not storage_zone:
continue
# Get pull zone details to see hostnames
pz_details = client.get_pull_zone(pz["Id"])
if not pz_details:
continue
hostnames = pz_details.get("Hostnames", [])
# Get the default b-cdn hostname
default_hostname = next(
(h["Value"] for h in hostnames if h.get("Value") and h["Value"].endswith(".b-cdn.net")),
f"{pz['Name']}.b-cdn.net"
)
# Filter for custom hostnames (not *.b-cdn.net)
custom_hostnames = [
h["Value"] for h in hostnames
if h.get("Value") and not h["Value"].endswith(".b-cdn.net")
]
# Create list of sites to import: custom domains first, then bcdn-only if no custom domains
sites_to_import = []
if custom_hostnames:
for ch in custom_hostnames:
sites_to_import.append((ch, default_hostname))
else:
sites_to_import.append((None, default_hostname))
# Import each site deployment
for custom_hostname, bcdn_hostname in sites_to_import:
try:
# Check if already exists
check_hostname = custom_hostname or bcdn_hostname
if deployment_repo.exists(check_hostname):
click.echo(f"SKIP: {check_hostname} (already in database)")
skipped += 1
continue
if dry_run:
click.echo(f"WOULD IMPORT: {check_hostname}")
click.echo(f" Storage Zone: {storage_zone['Name']} (Region: {storage_zone.get('Region', 'Unknown')})")
click.echo(f" Pull Zone: {pz['Name']} (ID: {pz['Id']})")
click.echo(f" b-cdn Hostname: {bcdn_hostname}")
if custom_hostname:
click.echo(f" Custom Domain: {custom_hostname}")
imported += 1
else:
# Create site deployment
deployment = deployment_repo.create(
site_name=storage_zone['Name'],
storage_zone_id=storage_zone['Id'],
storage_zone_name=storage_zone['Name'],
storage_zone_password=storage_zone.get('Password', ''),
storage_zone_region=storage_zone.get('Region', ''),
pull_zone_id=pz['Id'],
pull_zone_bcdn_hostname=bcdn_hostname,
custom_hostname=custom_hostname
)
# Randomly assign template
template_service = TemplateService()
available_templates = template_service.get_available_templates()
if available_templates:
deployment.template_name = random.choice(available_templates)
session.commit()
session.refresh(deployment)
click.echo(f"IMPORTED: {check_hostname}")
click.echo(f" Storage Zone: {storage_zone['Name']} (Region: {storage_zone.get('Region', 'Unknown')})")
click.echo(f" Pull Zone: {pz['Name']} (ID: {pz['Id']})")
if custom_hostname:
click.echo(f" Custom Domain: {custom_hostname}")
click.echo(f" Template: {deployment.template_name}")
imported += 1
except Exception as e:
click.echo(f"ERROR importing {check_hostname}: {e}", err=True)
errors += 1
click.echo("=" * 80)
click.echo(f"\nSync Summary:")
click.echo(f" Imported: {imported}")
click.echo(f" Skipped (already exists): {skipped}")
click.echo(f" Errors: {errors}")
if dry_run:
click.echo("\nDRY RUN complete - no changes were made")
click.echo("Run without --dry-run to import these sites")
except BunnyNetAuthError as e:
click.echo(f"Error: Authentication failed - {e}", err=True)
click.echo("Please check your BUNNY_ACCOUNT_API_KEY", err=True)
raise click.Abort()
except BunnyNetAPIError as e:
click.echo(f"Error: bunny.net API error - {e}", err=True)
raise click.Abort()
finally:
session.close()
except Exception as e:
click.echo(f"Error syncing sites: {e}", err=True)
raise click.Abort()
@app.command()
@click.option('--file', '-f', 'file_path', required=True, type=click.Path(exists=True), help='Path to CORA .xlsx file')
@click.option('--name', '-n', required=True, help='Project name')
@click.option('--money-site-url', '-m', help='Money site URL (e.g., https://example.com)')
@click.option('--custom-anchors', '-a', help='Comma-separated list of custom anchor text (optional)')
@click.option('--tier1-branded-ratio', '-t', default=None, type=float, help='Ratio of branded anchor text for tier1 (optional, only prompts if provided)')
@click.option('--tier1-branded-plus-ratio', '-bp', default=None, type=float, help='Ratio of branded+ anchor text for tier1 (optional, applied to remaining slots after branded)')
@click.option('--random-deployment-targets', '-r', type=int, help='Number of random deployment targets to select (default: random 2-3)')
@click.option('--tier1-count', type=int, help='Number of tier1 articles (default: random 10-12)')
@click.option('--username', '-u', help='Username for authentication')
@click.option('--password', '-p', help='Password for authentication')
def ingest_cora(file_path: str, name: str, money_site_url: Optional[str], custom_anchors: Optional[str], tier1_branded_ratio: float, tier1_branded_plus_ratio: Optional[float], random_deployment_targets: Optional[int], tier1_count: Optional[int], username: Optional[str], password: Optional[str]):
"""Ingest a CORA .xlsx report and create a new project"""
try:
if not username or not password:
username, password = prompt_admin_credentials()
session = db_manager.get_session()
try:
user_repo = UserRepository(session)
auth_service = AuthService(user_repo)
user = auth_service.authenticate_user(username, password)
if not user:
click.echo("Error: Authentication failed", err=True)
raise click.Abort()
click.echo(f"Authenticated as: {user.username} ({user.role})")
click.echo(f"\nParsing CORA file: {file_path}")
custom_anchor_list = []
if custom_anchors:
custom_anchor_list = [anchor.strip() for anchor in custom_anchors.split(',') if anchor.strip()]
parser = CORAParser(file_path)
cora_data = parser.parse(custom_anchor_text=custom_anchor_list)
click.echo(f"Main Keyword: {cora_data['main_keyword']}")
click.echo(f"Word Count: {cora_data['word_count']}")
click.echo(f"Entities Found: {len(cora_data['entities'])}")
click.echo(f"Related Searches: {len(cora_data['related_searches'])}")
# Prompt for money_site_url if not provided
if not money_site_url:
money_site_url = click.prompt(
"\nEnter money site URL (required for tiered linking)",
type=str
)
# Validate money_site_url
if not money_site_url.startswith('http://') and not money_site_url.startswith('https://'):
click.echo("Error: Money site URL must start with http:// or https://", err=True)
raise click.Abort()
# Clean up URL (remove trailing slash)
money_site_url = money_site_url.rstrip('/')
click.echo(f"\nCreating project: {name}")
click.echo(f"Money Site URL: {money_site_url}")
# Add money_site_url to cora_data
cora_data['money_site_url'] = money_site_url
project_repo = ProjectRepository(session)
project = project_repo.create(
user_id=user.id,
name=name,
data=cora_data
)
click.echo(f"\nSuccess: Project '{project.name}' created (ID: {project.id})")
click.echo(f"Main Keyword: {project.main_keyword}")
click.echo(f"Money Site URL: {project.money_site_url}")
click.echo(f"Entities: {len(project.entities or [])}")
click.echo(f"Related Searches: {len(project.related_searches or [])}")
if project.custom_anchor_text:
click.echo(f"Custom Anchor Text: {', '.join(project.custom_anchor_text)}")
# Handle tier1 branded anchor text if ratio is specified
tier1_branded_text = None
brand_names = None
if tier1_branded_ratio is not None and tier1_branded_ratio > 0:
# Look up default brands from brand mapping
default_brands = _get_brands_for_url(money_site_url)
default_prompt = ""
if default_brands:
default_prompt = f" [default: '{', '.join(default_brands)}'] (press Enter for default)"
tier1_branded_text = click.prompt(
f"\nEnter branded anchor text (company name) for tier1 (comma-separated for multiple, e.g., 'AGI Fabricators, AGI'){default_prompt}",
type=str,
default=""
).strip()
# Use defaults if Enter was pressed and defaults exist
if not tier1_branded_text and default_brands:
tier1_branded_text = ", ".join(default_brands)
click.echo(f"Using default brands: {tier1_branded_text}")
if not tier1_branded_text:
click.echo("Warning: Empty branded anchor text provided, skipping tier1 branded anchor text configuration.", err=True)
tier1_branded_text = None
tier1_branded_ratio = None
else:
# Parse brand names for branded+ generation
brand_names = [text.strip() for text in tier1_branded_text.split(',') if text.strip()]
# Handle branded+ ratio if flag is provided
if tier1_branded_plus_ratio is not None:
# Validate the provided ratio
if tier1_branded_plus_ratio <= 0 or tier1_branded_plus_ratio > 1:
click.echo("Warning: Invalid branded+ ratio provided, skipping branded+ configuration.", err=True)
tier1_branded_plus_ratio = None
elif not brand_names:
# If brand names weren't set from branded prompt, try to get them from brand lookup
default_brands = _get_brands_for_url(money_site_url)
if default_brands:
brand_names = default_brands
click.echo(f"Using brand names from mapping for branded+: {', '.join(brand_names)}")
else:
click.echo("Warning: No brand names available for branded+ (set --tier1-branded-ratio or add to brands.json). Skipping branded+ configuration.", err=True)
tier1_branded_plus_ratio = None
job_file = create_job_file_for_project(
project.id,
project.name,
session,
tier1_branded_ratio=tier1_branded_ratio,
tier1_branded_text=tier1_branded_text,
tier1_branded_plus_ratio=tier1_branded_plus_ratio,
brand_names=brand_names,
random_deployment_targets=random_deployment_targets
)
if job_file:
click.echo(f"Job file created: {job_file}")
except CORAParseError as e:
click.echo(f"Error parsing CORA file: {e}", err=True)
raise click.Abort()
except ValueError as e:
click.echo(f"Error creating project: {e}", err=True)
raise click.Abort()
finally:
session.close()
except Exception as e:
click.echo(f"Error ingesting CORA file: {e}", err=True)
raise click.Abort()
@app.command()
@click.option('--file', '-f', 'file_path', required=True, type=click.Path(exists=True), help='Path to simple .xlsx spreadsheet file')
@click.option('--name', '-n', help='Project name (overrides project_name from spreadsheet if provided)')
@click.option('--money-site-url', '-m', help='Money site URL (e.g., https://example.com)')
@click.option('--username', '-u', help='Username for authentication')
@click.option('--password', '-p', help='Password for authentication')
def ingest_simple(file_path: str, name: Optional[str], money_site_url: Optional[str], username: Optional[str], password: Optional[str]):
"""Ingest a simple spreadsheet and create a new project
Expected spreadsheet format:
- First row: Headers (main_keyword, project_name, related_searches, entities)
- Second row: Data values
Required columns: main_keyword, project_name, related_searches, entities
- main_keyword: Single phrase keyword
- project_name: Name for the project
- related_searches: Comma-delimited list (e.g., "term1, term2, term3")
- entities: Comma-delimited list (e.g., "entity1, entity2, entity3")
Optional columns (with defaults):
- word_count: Default 1500
- term_frequency: Default 3
"""
try:
if not username or not password:
username, password = prompt_admin_credentials()
session = db_manager.get_session()
try:
user_repo = UserRepository(session)
auth_service = AuthService(user_repo)
user = auth_service.authenticate_user(username, password)
if not user:
click.echo("Error: Authentication failed", err=True)
raise click.Abort()
click.echo(f"Authenticated as: {user.username} ({user.role})")
click.echo(f"\nParsing simple spreadsheet: {file_path}")
parser = SimpleSpreadsheetParser(file_path)
data = parser.parse()
project_name = name or data.get("project_name")
if not project_name:
click.echo("Error: Project name is required (provide via --name or in spreadsheet)", err=True)
raise click.Abort()
click.echo(f"Main Keyword: {data['main_keyword']}")
click.echo(f"Project Name: {project_name}")
click.echo(f"Word Count: {data['word_count']}")
click.echo(f"Term Frequency: {data['term_frequency']}")
click.echo(f"Entities: {len(data['entities'])}")
click.echo(f"Related Searches: {len(data['related_searches'])}")
if data['entities']:
click.echo(f" Entities: {', '.join(data['entities'][:5])}" + (f" ... (+{len(data['entities']) - 5} more)" if len(data['entities']) > 5 else ""))
if data['related_searches']:
click.echo(f" Related Searches: {', '.join(data['related_searches'][:5])}" + (f" ... (+{len(data['related_searches']) - 5} more)" if len(data['related_searches']) > 5 else ""))
if not money_site_url:
money_site_url = click.prompt(
"\nEnter money site URL (required for tiered linking)",
type=str
)
if not money_site_url.startswith('http://') and not money_site_url.startswith('https://'):
click.echo("Error: Money site URL must start with http:// or https://", err=True)
raise click.Abort()
money_site_url = money_site_url.rstrip('/')
click.echo(f"\nCreating project: {project_name}")
click.echo(f"Money Site URL: {money_site_url}")
data['money_site_url'] = money_site_url
project_data = {k: v for k, v in data.items() if k != 'project_name'}
project_repo = ProjectRepository(session)
project = project_repo.create(
user_id=user.id,
name=project_name,
data=project_data
)
click.echo(f"\nSuccess: Project '{project.name}' created (ID: {project.id})")
click.echo(f"Main Keyword: {project.main_keyword}")
click.echo(f"Money Site URL: {project.money_site_url}")
click.echo(f"Word Count: {project.word_count}")
click.echo(f"Term Frequency: {project.term_frequency}")
click.echo(f"Entities: {len(project.entities or [])}")
click.echo(f"Related Searches: {len(project.related_searches or [])}")
except CORAParseError as e:
click.echo(f"Error parsing spreadsheet: {e}", err=True)
raise click.Abort()
except ValueError as e:
click.echo(f"Error creating project: {e}", err=True)
raise click.Abort()
finally:
session.close()
except Exception as e:
click.echo(f"Error ingesting spreadsheet: {e}", err=True)
raise click.Abort()
@app.command()
@click.option('--username', '-u', help='Username for authentication')
@click.option('--password', '-p', help='Password for authentication')
def list_projects(username: Optional[str], password: Optional[str]):
"""List all projects for the authenticated user"""
try:
if not username or not password:
username, password = prompt_admin_credentials()
session = db_manager.get_session()
try:
user_repo = UserRepository(session)
auth_service = AuthService(user_repo)
user = auth_service.authenticate_user(username, password)
if not user:
click.echo("Error: Authentication failed", err=True)
raise click.Abort()
project_repo = ProjectRepository(session)
if user.is_admin():
projects = project_repo.get_all()
click.echo(f"\nAll Projects (Admin View):")
else:
projects = project_repo.get_by_user_id(user.id)
click.echo(f"\nYour Projects:")
if not projects:
click.echo("No projects found")
return
click.echo(f"Total projects: {len(projects)}")
click.echo("-" * 80)
click.echo(f"{'ID':<5} {'Name':<30} {'Keyword':<25} {'Created':<20}")
click.echo("-" * 80)
for project in projects:
created_str = project.created_at.strftime('%Y-%m-%d %H:%M:%S')
click.echo(f"{project.id:<5} {project.name[:29]:<30} {project.main_keyword[:24]:<25} {created_str:<20}")
click.echo("-" * 80)
finally:
session.close()
except Exception as e:
click.echo(f"Error listing projects: {e}", err=True)
raise click.Abort()
@app.command("create-job")
@click.option('--project-id', '-p', required=True, type=int, help='Project ID to create job file for')
@click.option('--deployment-targets', '-d', multiple=True, help='Deployment target hostnames (can specify multiple times)')
@click.option('--tier1-count', default=10, type=int, help='Number of tier1 articles (default: 10)')
@click.option('--tier2-count', default=30, type=int, help='Number of tier2 articles (default: 30)')
@click.option('--tier1-branded-ratio', '-t', default=None, type=float, help='Ratio of branded anchor text for tier1 (optional, only prompts if provided)')
@click.option('--output', '-o', type=click.Path(), help='Output file path (default: jobs/{project_name}.json)')
@click.option('--username', '-u', help='Username for authentication')
@click.option('--password', '-pwd', help='Password for authentication')
def create_job(
project_id: int,
deployment_targets: tuple,
tier1_count: int,
tier2_count: int,
tier1_branded_ratio: Optional[float],
output: Optional[str],
username: Optional[str],
password: Optional[str]
):
"""Create a job file from an existing project ID"""
try:
if not username or not password:
username, password = prompt_admin_credentials()
session = db_manager.get_session()
try:
user_repo = UserRepository(session)
auth_service = AuthService(user_repo)
user = auth_service.authenticate_user(username, password)
if not user:
click.echo("Error: Authentication failed", err=True)
raise click.Abort()
project_repo = ProjectRepository(session)
project = project_repo.get_by_id(project_id)
if not project:
click.echo(f"Error: Project {project_id} not found", err=True)
raise click.Abort()
deployment_targets_list = list(deployment_targets) if deployment_targets else None
if not deployment_targets_list:
site_repo = SiteDeploymentRepository(session)
sites = site_repo.get_all()
available_domains = [
site.custom_hostname
for site in sites
if site.custom_hostname is not None
]
if available_domains:
click.echo(f"Available sites: {', '.join(available_domains[:5])}{'...' if len(available_domains) > 5 else ''}")
click.echo("Note: No deployment_targets specified. You can add them manually to the job file.")
sanitized_name = "".join(c if c.isalnum() or c in ('-', '_') else '-' for c in project.name.lower()).strip('-')
sanitized_name = '-'.join(sanitized_name.split())
jobs_dir = Path("jobs")
jobs_dir.mkdir(exist_ok=True)
if output:
filepath = Path(output)
else:
base_filename = f"{sanitized_name}.json"
filepath = jobs_dir / base_filename
if filepath.exists():
date_suffix = datetime.now().strftime("%y%m%d")
base_filename = f"{sanitized_name}-{date_suffix}.json"
filepath = jobs_dir / base_filename
job_template = {
"jobs": [
{
"project_id": project_id,
"tiers": {
"tier1": {
"count": tier1_count,
"min_word_count": 1250,
"max_word_count": 2000,
"models": {
"title": "openai/gpt-4o-mini",
"outline": "openai/gpt-4o-mini",
"content": "anthropic/claude-3.5-sonnet"
}
},
"tier2": {
"count": tier2_count,
"min_word_count": 1000,
"max_word_count": 1250,
"models": {
"title": "openai/gpt-4o-mini",
"outline": "openai/gpt-4o-mini",
"content": "openai/gpt-4o-mini"
},
"interlinking": {
"links_per_article_min": 3,
"links_per_article_max": 6
}
}
}
}
]
}
if deployment_targets_list:
job_template["jobs"][0]["deployment_targets"] = deployment_targets_list
with open(filepath, 'w', encoding='utf-8') as f:
json.dump(job_template, f, indent=2)
click.echo(f"\nJob file created: {filepath}")
click.echo(f"Project: {project.name} (ID: {project_id})")
click.echo(f"Tier1: {tier1_count} articles")
click.echo(f"Tier2: {tier2_count} articles")
if deployment_targets_list:
click.echo(f"Deployment targets: {', '.join(deployment_targets_list)}")
click.echo(f"\nTo run this job:")
click.echo(f" uv run python main.py generate-batch --job-file {filepath} -u {username} --password <password>")
finally:
session.close()
except Exception as e:
click.echo(f"Error creating job file: {e}", err=True)
raise click.Abort()
@app.command("generate-batch")
@click.option('--job-file', '-j', required=True, type=click.Path(exists=True),
help='Path to job JSON file')
@click.option('--username', '-u', help='Username for authentication')
@click.option('--password', '-p', help='Password for authentication')
@click.option('--debug', is_flag=True, help='Save AI responses to debug_output/')
@click.option('--continue-on-error', is_flag=True,
help='Continue processing if article generation fails')
@click.option('--model', '-m', default='gpt-4o-mini',
help='AI model to use (gpt-4o-mini, x-ai/grok-4-fast)')
def generate_batch(
job_file: str,
username: Optional[str],
password: Optional[str],
debug: bool,
continue_on_error: bool,
model: str
):
"""Generate content batch from job file"""
try:
if not username:
username = os.getenv("CLIENT_USERNAME")
if not password:
password = os.getenv("CLIENT_PASSWORD")
if not username or not password:
username, password = prompt_admin_credentials()
session = db_manager.get_session()
try:
user_repo = UserRepository(session)
auth_service = AuthService(user_repo)
user = auth_service.authenticate_user(username, password)
if not user:
click.echo("Error: Authentication failed", err=True)
raise click.Abort()
click.echo(f"Authenticated as: {user.username} ({user.role})")
api_key = os.getenv("OPENROUTER_API_KEY")
if not api_key:
click.echo("Error: OPENROUTER_API_KEY not found in environment", err=True)
click.echo("Please set OPENROUTER_API_KEY in your .env file", err=True)
raise click.Abort()
from src.generation.job_config import JobConfig
job_config = JobConfig(job_file)
jobs = job_config.get_jobs()
has_models_in_job = any(job.models is not None for job in jobs)
if has_models_in_job and model != 'gpt-4o-mini':
click.echo(f"Warning: Job file contains per-stage model configuration.")
click.echo(f" The --model flag will be ignored in favor of job config.\n")
click.echo(f"Initializing AI client with default model: {model}")
ai_client = AIClient(api_key=api_key, model=model)
prompt_manager = PromptManager()
project_repo = ProjectRepository(session)
content_repo = GeneratedContentRepository(session)
site_deployment_repo = SiteDeploymentRepository(session)
content_generator = ContentGenerator(
ai_client=ai_client,
prompt_manager=prompt_manager,
project_repo=project_repo,
content_repo=content_repo
)
max_workers = get_concurrent_workers()
job_max_workers = jobs[0].max_workers if jobs and jobs[0].max_workers else None
final_max_workers = job_max_workers or max_workers
batch_processor = BatchProcessor(
content_generator=content_generator,
content_repo=content_repo,
project_repo=project_repo,
site_deployment_repo=site_deployment_repo,
max_workers=final_max_workers
)
click.echo(f"\nProcessing job file: {job_file}")
click.echo(f"Concurrent workers: {final_max_workers}")
if debug:
click.echo("Debug mode: AI responses will be saved to debug_output/\n")
batch_processor.process_job(
job_file_path=job_file,
debug=debug,
continue_on_error=continue_on_error
)
done_dir = os.path.join("jobs", "done")
os.makedirs(done_dir, exist_ok=True)
job_path = job_file
job_filename = os.path.basename(job_path)
destination = os.path.join(done_dir, job_filename)
if os.path.exists(job_path):
os.rename(job_path, destination)
click.echo(f"\nJob file moved to: {destination}")
finally:
session.close()
except Exception as e:
click.echo(f"Error processing batch: {e}", err=True)
raise click.Abort()
@app.command("deploy-batch")
@click.option('--batch-id', '-b', required=True, type=int, help='Project/batch ID to deploy')
@click.option('--username', '-u', help='Username for authentication')
@click.option('--password', '-p', help='Password for authentication')
@click.option('--continue-on-error', is_flag=True, default=True,
help='Continue if file fails (default: True)')
@click.option('--dry-run', is_flag=True, help='Preview what would be deployed')
def deploy_batch(
batch_id: int,
username: Optional[str],
password: Optional[str],
continue_on_error: bool,
dry_run: bool
):
"""Deploy all content in a batch to cloud storage"""
try:
if not username or not password:
username, password = prompt_admin_credentials()
admin = authenticate_admin(username, password)
if not admin:
click.echo("Error: Authentication failed or insufficient permissions", err=True)
raise click.Abort()
click.echo(f"Authenticated as: {admin.username} ({admin.role})")
session = db_manager.get_session()
try:
project_repo = ProjectRepository(session)
content_repo = GeneratedContentRepository(session)
site_repo = SiteDeploymentRepository(session)
page_repo = SitePageRepository(session)
project = project_repo.get_by_id(batch_id)
if not project:
click.echo(f"Error: Project/batch {batch_id} not found", err=True)
raise click.Abort()
click.echo(f"\nDeploying batch: {project.name} (ID: {batch_id})")
click.echo(f"Keyword: {project.main_keyword}")
articles = content_repo.get_by_project_id(batch_id)
click.echo(f"Found {len(articles)} articles")
if dry_run:
click.echo("\nDRY RUN MODE - No files will be uploaded\n")
for article in articles:
if not article.site_deployment_id:
click.echo(f"SKIP: Article {article.id} - No site assigned")
continue
site = site_repo.get_by_id(article.site_deployment_id)
if not site:
click.echo(f"SKIP: Article {article.id} - Site not found")
continue
from src.generation.url_generator import generate_file_path, generate_public_url
file_path = generate_file_path(article)
url = generate_public_url(site, file_path)
click.echo(f"WOULD DEPLOY: {article.title[:50]}")
click.echo(f" File: {file_path}")
click.echo(f" URL: {url}")
site_ids = set(a.site_deployment_id for a in articles if a.site_deployment_id)
for site_id in site_ids:
pages = page_repo.get_by_site(site_id)
for page in pages:
click.echo(f"WOULD DEPLOY: {page.page_type}.html")
click.echo("\nDry run complete. Use without --dry-run to actually deploy.")
return
url_logger = URLLogger()
deployment_service = DeploymentService(
content_repo=content_repo,
site_repo=site_repo,
page_repo=page_repo,
url_logger=url_logger
)
click.echo(f"\nStarting deployment...")
click.echo(f"Continue on error: {continue_on_error}")
click.echo("")
results = deployment_service.deploy_batch(
project_id=batch_id,
continue_on_error=continue_on_error
)
click.echo("\n" + "=" * 70)
click.echo("Deployment Summary")
click.echo("=" * 70)
click.echo(f"Articles deployed: {results['articles_deployed']}")
click.echo(f"Articles failed: {results['articles_failed']}")
click.echo(f"Pages deployed: {results['pages_deployed']}")
click.echo(f"Pages failed: {results['pages_failed']}")
click.echo(f"Total time: {results['total_time']:.1f}s")
if results['errors']:
click.echo("\nErrors:")
for error in results['errors']:
if error['type'] == 'article':
click.echo(f" Article {error['id']} ({error.get('title', 'N/A')[:40]}): {error['error']}")
else:
click.echo(f" Page {error.get('page_type', 'N/A')} (Site {error.get('site_id')}): {error['error']}")
click.echo("=" * 70)
if results['articles_failed'] > 0 or results['pages_failed'] > 0:
raise click.Abort()
except BunnyStorageError as e:
click.echo(f"\nError: Storage upload failed - {e}", err=True)
raise click.Abort()
finally:
session.close()
except Exception as e:
click.echo(f"Error deploying batch: {e}", err=True)
raise click.Abort()
@app.command("verify-deployment")
@click.option('--batch-id', '-b', required=True, type=int, help='Project/batch ID to verify')
@click.option('--sample', '-s', type=int, help='Number of random URLs to check (default: check all)')
@click.option('--timeout', '-t', type=int, default=10, help='Request timeout in seconds (default: 10)')
def verify_deployment(batch_id: int, sample: Optional[int], timeout: int):
"""Verify deployed URLs return 200 OK status"""
try:
session = db_manager.get_session()
try:
content_repo = GeneratedContentRepository(session)
project_repo = ProjectRepository(session)
project = project_repo.get_by_id(batch_id)
if not project:
click.echo(f"Error: Project/batch {batch_id} not found", err=True)
raise click.Abort()
click.echo(f"Verifying deployment for batch: {project.name} (ID: {batch_id})")
click.echo(f"Keyword: {project.main_keyword}\n")
articles = content_repo.get_by_project_id(batch_id)
deployed_articles = [a for a in articles if a.deployed_url and a.status == 'deployed']
if not deployed_articles:
click.echo("No deployed articles found for this batch.")
return
click.echo(f"Found {len(deployed_articles)} deployed articles")
urls_to_check = deployed_articles
if sample and sample < len(deployed_articles):
urls_to_check = random.sample(deployed_articles, sample)
click.echo(f"Checking random sample of {sample} URLs\n")
else:
click.echo(f"Checking all {len(deployed_articles)} URLs\n")
successful = []
failed = []
for article in urls_to_check:
url = article.deployed_url
try:
response = requests.get(url, timeout=timeout, allow_redirects=True)
if response.status_code == 200:
successful.append((url, article.title))
click.echo(f"{url}")
else:
failed.append((url, article.title, response.status_code))
click.echo(f"{url} (HTTP {response.status_code})")
except requests.exceptions.RequestException as e:
failed.append((url, article.title, str(e)))
click.echo(f"{url} (Error: {type(e).__name__})")
click.echo("\n" + "=" * 70)
click.echo("Verification Summary")
click.echo("=" * 70)
click.echo(f"Total checked: {len(urls_to_check)}")
click.echo(f"Successful: {len(successful)}")
click.echo(f"Failed: {len(failed)}")
if failed:
click.echo("\nFailed URLs:")
for url, title, error in failed:
title_preview = title[:50] + "..." if len(title) > 50 else title
click.echo(f" {url}")
click.echo(f" Title: {title_preview}")
click.echo(f" Error: {error}")
click.echo("=" * 70)
if failed:
raise click.Abort()
finally:
session.close()
except Exception as e:
click.echo(f"Error verifying deployment: {e}", err=True)
raise click.Abort()
@app.command("get-links")
@click.option('--project-id', '-p', required=True, type=int, help='Project ID to get links for')
@click.option('--tier', '-t', required=True, help='Tier to filter (e.g., "1" or "2+" for tier 2 and above)')
@click.option('--with-anchor-text', is_flag=True, help='Include anchor text used for tiered links')
@click.option('--with-destination-url', is_flag=True, help='Include destination URL that the article links to')
def get_links(project_id: int, tier: str, with_anchor_text: bool, with_destination_url: bool):
"""Export article URLs with optional link details for a project and tier"""
import csv
import sys
from src.database.repositories import ArticleLinkRepository
try:
session = db_manager.get_session()
try:
content_repo = GeneratedContentRepository(session)
project_repo = ProjectRepository(session)
link_repo = ArticleLinkRepository(session)
project = project_repo.get_by_id(project_id)
if not project:
click.echo(f"Error: Project {project_id} not found", err=True)
raise click.Abort()
tier_range_mode = False
min_tier = 1
if tier.endswith('+'):
tier_range_mode = True
try:
min_tier = int(tier[:-1])
except ValueError:
click.echo(f"Error: Invalid tier format '{tier}'. Use '1', '2', or '2+'", err=True)
raise click.Abort()
else:
try:
min_tier = int(tier)
tier_range_mode = False
except ValueError:
click.echo(f"Error: Invalid tier format '{tier}'. Use '1', '2', or '2+'", err=True)
raise click.Abort()
all_articles = content_repo.get_by_project_id(project_id)
if tier_range_mode:
articles = [a for a in all_articles if a.deployed_url and int(a.tier.replace('tier', '')) >= min_tier]
else:
tier_str = f"tier{min_tier}"
articles = [a for a in all_articles if a.deployed_url and a.tier == tier_str]
if not articles:
click.echo(f"No deployed articles found for project {project_id} with tier filter '{tier}'", err=True)
raise click.Abort()
csv_writer = csv.writer(sys.stdout)
header = ['article_url', 'tier', 'title']
if with_anchor_text:
header.append('anchor_text')
if with_destination_url:
header.append('destination_url')
csv_writer.writerow(header)
for article in articles:
row = [article.deployed_url, article.tier, article.title]
if with_anchor_text or with_destination_url:
tiered_links = link_repo.get_by_source_article(article.id)
tiered_links = [link for link in tiered_links if link.link_type == 'tiered']
if tiered_links:
for link in tiered_links:
row_with_link = row.copy()
if with_anchor_text:
row_with_link.append(link.anchor_text or '')
if with_destination_url:
if link.to_url:
row_with_link.append(link.to_url)
elif link.to_content_id:
target_article = content_repo.get_by_id(link.to_content_id)
row_with_link.append(target_article.deployed_url if target_article and target_article.deployed_url else '')
else:
row_with_link.append('')
csv_writer.writerow(row_with_link)
else:
if with_anchor_text:
row.append('')
if with_destination_url:
row.append('')
csv_writer.writerow(row)
else:
csv_writer.writerow(row)
finally:
session.close()
except Exception as e:
click.echo(f"Error getting links: {e}", err=True)
raise click.Abort()
if __name__ == "__main__":
app()