1945 lines
84 KiB
Python
1945 lines
84 KiB
Python
"""
|
|
CLI command definitions using Click
|
|
"""
|
|
import random
|
|
import click
|
|
from typing import Optional, List
|
|
from src.core.config import get_config, get_bunny_account_api_key, get_concurrent_workers
|
|
from src.auth.service import AuthService
|
|
from src.database.session import db_manager
|
|
from src.database.repositories import UserRepository, SiteDeploymentRepository, ProjectRepository
|
|
from src.database.models import User
|
|
from src.interlinking.anchor_text_generator import AnchorTextGenerator
|
|
from src.deployment.bunnynet import (
|
|
BunnyNetClient,
|
|
BunnyNetAPIError,
|
|
BunnyNetAuthError,
|
|
BunnyNetResourceConflictError
|
|
)
|
|
from src.ingestion.parser import CORAParser, CORAParseError, SimpleSpreadsheetParser
|
|
from src.generation.ai_client import AIClient, PromptManager
|
|
from src.generation.service import ContentGenerator
|
|
from src.generation.batch_processor import BatchProcessor
|
|
from src.database.repositories import GeneratedContentRepository, SitePageRepository
|
|
from src.deployment.bunny_storage import BunnyStorageError
|
|
from src.deployment.deployment_service import DeploymentService
|
|
from src.deployment.url_logger import URLLogger
|
|
from src.templating.service import TemplateService
|
|
from dotenv import load_dotenv
|
|
import os
|
|
import requests
|
|
import random
|
|
import json
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
|
|
# Load .env file at module level
|
|
load_dotenv()
|
|
|
|
|
|
def _get_brands_for_url(url: str) -> List[str]:
|
|
"""
|
|
Look up brand names for a given URL from brands.json
|
|
|
|
Args:
|
|
url: Money site URL (e.g., "https://www.gullco.com")
|
|
|
|
Returns:
|
|
List of brand names, or empty list if not found or file missing
|
|
"""
|
|
try:
|
|
from urllib.parse import urlparse
|
|
|
|
# Normalize URL: remove scheme, www., trailing slash
|
|
parsed = urlparse(url)
|
|
domain = parsed.netloc
|
|
|
|
# Remove www. prefix if present
|
|
if domain.startswith('www.'):
|
|
domain = domain[4:]
|
|
|
|
# Load brands.json from project root
|
|
brands_file = Path("brands.json")
|
|
if not brands_file.exists():
|
|
return []
|
|
|
|
with open(brands_file, 'r', encoding='utf-8') as f:
|
|
brands_data = json.load(f)
|
|
|
|
# Look up normalized domain
|
|
return brands_data.get(domain, [])
|
|
except Exception:
|
|
return []
|
|
|
|
|
|
def create_job_file_for_project(
|
|
project_id: int,
|
|
project_name: str,
|
|
session,
|
|
tier1_branded_ratio: Optional[float] = None,
|
|
tier1_branded_text: Optional[str] = None,
|
|
tier1_branded_plus_ratio: Optional[float] = None,
|
|
brand_names: Optional[List[str]] = None,
|
|
random_deployment_targets: Optional[int] = None
|
|
) -> Optional[str]:
|
|
"""
|
|
Create a job JSON file for a newly created project.
|
|
|
|
Args:
|
|
project_id: The ID of the created project
|
|
project_name: The name of the project (for filename)
|
|
session: Database session
|
|
tier1_branded_ratio: Optional ratio of branded anchor text for tier1 (0.0-1.0)
|
|
tier1_branded_text: Optional branded anchor text (company name) for tier1
|
|
tier1_branded_plus_ratio: Optional ratio of branded+ anchor text for tier1 (0.0-1.0, applied to remaining slots after branded)
|
|
brand_names: Optional list of brand names for branded+ generation
|
|
random_deployment_targets: Optional number of random deployment targets to select (default: random 2-3)
|
|
|
|
Returns:
|
|
Path to created file, or None if creation failed
|
|
"""
|
|
try:
|
|
deployment_repo = SiteDeploymentRepository(session)
|
|
sites = deployment_repo.get_all()
|
|
|
|
available_domains = [
|
|
site.custom_hostname
|
|
for site in sites
|
|
if site.custom_hostname is not None
|
|
]
|
|
|
|
if not available_domains:
|
|
click.echo("Warning: No domains with custom hostnames found. Job file not created.", err=True)
|
|
return None
|
|
|
|
t1_count = tier1_count if tier1_count is not None else random.randint(10, 12)
|
|
t2_count = random.randint(30, 45)
|
|
if random_deployment_targets is not None:
|
|
num_targets = min(random_deployment_targets, len(available_domains))
|
|
else:
|
|
num_targets = min(random.randint(2, 3), len(available_domains))
|
|
selected_domains = random.sample(available_domains, num_targets)
|
|
|
|
sanitized_name = "".join(c if c.isalnum() or c in ('-', '_') else '-' for c in project_name.lower()).strip('-')
|
|
sanitized_name = '-'.join(sanitized_name.split())
|
|
|
|
jobs_dir = Path("jobs")
|
|
jobs_dir.mkdir(exist_ok=True)
|
|
|
|
base_filename = f"{sanitized_name}.json"
|
|
filepath = jobs_dir / base_filename
|
|
|
|
if filepath.exists():
|
|
date_suffix = datetime.now().strftime("%y%m%d")
|
|
base_filename = f"{sanitized_name}-{date_suffix}.json"
|
|
filepath = jobs_dir / base_filename
|
|
|
|
# Build tier1 configuration
|
|
tier1_config = {
|
|
"count": t1_count,
|
|
"min_word_count": 1250,
|
|
"max_word_count": 2000,
|
|
"models": {
|
|
"title": "openai/gpt-4o-mini",
|
|
"outline": "openai/gpt-4o-mini",
|
|
"content": "x-ai/grok-4-fast"
|
|
}
|
|
}
|
|
|
|
# Add anchor_text_config if branded ratio/text or branded+ ratio is provided
|
|
if (tier1_branded_ratio is not None and tier1_branded_text) or (tier1_branded_plus_ratio is not None and brand_names):
|
|
# Get project to retrieve main_keyword for non-branded terms
|
|
project_repo = ProjectRepository(session)
|
|
project = project_repo.get_by_id(project_id)
|
|
|
|
if project and project.main_keyword:
|
|
# First, get the actual available anchor text terms
|
|
|
|
# Use custom anchor text from CORA if available, otherwise generate keyword variations
|
|
if project.custom_anchor_text and len(project.custom_anchor_text) > 0:
|
|
keyword_variations = project.custom_anchor_text
|
|
elif project.related_searches and len(project.related_searches) > 0:
|
|
keyword_variations = project.related_searches
|
|
else:
|
|
anchor_generator = AnchorTextGenerator()
|
|
keyword_variations = anchor_generator._generate_from_keyword(project, 10)
|
|
|
|
# Use the ACTUAL count of available terms
|
|
actual_count = len(keyword_variations)
|
|
|
|
# Calculate branded and remaining counts based on actual available terms
|
|
branded_count = 0
|
|
if tier1_branded_ratio is not None and tier1_branded_text:
|
|
branded_count = int(actual_count * tier1_branded_ratio)
|
|
remaining_count = actual_count - branded_count
|
|
|
|
# Parse comma-separated branded anchor texts
|
|
branded_texts = []
|
|
if tier1_branded_text:
|
|
branded_texts = [text.strip() for text in tier1_branded_text.split(',') if text.strip()]
|
|
|
|
# Create anchor text list starting with branded terms
|
|
anchor_terms = []
|
|
for i in range(branded_count):
|
|
branded_text = branded_texts[i % len(branded_texts)] # Cycle through branded texts
|
|
anchor_terms.append(branded_text)
|
|
|
|
# Generate branded+ terms if enabled
|
|
branded_plus_count = 0
|
|
if tier1_branded_plus_ratio is not None and brand_names and len(brand_names) > 0:
|
|
branded_plus_count = int(remaining_count * tier1_branded_plus_ratio)
|
|
|
|
# Generate branded+ terms from brands + related_searches
|
|
# Use related_searches from project, or fallback to keyword_variations
|
|
related_searches = project.related_searches if project.related_searches else keyword_variations
|
|
|
|
branded_plus_terms = []
|
|
for brand in brand_names:
|
|
for term in related_searches:
|
|
branded_plus_terms.append(f"{brand} {term}")
|
|
branded_plus_terms.append(f"{term} by {brand}")
|
|
|
|
# Randomly select the needed number of branded+ terms
|
|
if len(branded_plus_terms) > 0:
|
|
if branded_plus_count > len(branded_plus_terms):
|
|
selected_branded_plus = branded_plus_terms
|
|
else:
|
|
selected_branded_plus = random.sample(branded_plus_terms, branded_plus_count)
|
|
anchor_terms.extend(selected_branded_plus)
|
|
|
|
# Calculate regular count from remaining slots
|
|
regular_count = remaining_count - branded_plus_count
|
|
|
|
# Add regular terms
|
|
if regular_count > 0:
|
|
# Randomize keyword selection if we're not using all available terms
|
|
if regular_count < len(keyword_variations):
|
|
selected_keywords = random.sample(keyword_variations, regular_count)
|
|
else:
|
|
selected_keywords = keyword_variations[:regular_count]
|
|
anchor_terms.extend(selected_keywords)
|
|
|
|
tier1_config["anchor_text_config"] = {
|
|
"mode": "explicit",
|
|
"terms": anchor_terms
|
|
}
|
|
|
|
job_template = {
|
|
"jobs": [
|
|
{
|
|
"project_id": project_id,
|
|
"deployment_targets": selected_domains,
|
|
"tiers": {
|
|
"tier1": tier1_config,
|
|
"tier2": {
|
|
"count": t2_count,
|
|
"min_word_count": 1000,
|
|
"max_word_count": 1250,
|
|
"models": {
|
|
"title": "openai/gpt-4o-mini",
|
|
"outline": "openai/gpt-4o-mini",
|
|
"content": "openai/gpt-4o-mini"
|
|
},
|
|
"interlinking": {
|
|
"links_per_article_min": 3,
|
|
"links_per_article_max": 6
|
|
}
|
|
}
|
|
}
|
|
}
|
|
]
|
|
}
|
|
|
|
with open(filepath, 'w', encoding='utf-8') as f:
|
|
json.dump(job_template, f, indent=2)
|
|
|
|
return str(filepath)
|
|
|
|
except Exception as e:
|
|
click.echo(f"Warning: Failed to create job file: {e}", err=True)
|
|
return None
|
|
|
|
|
|
def authenticate_admin(username: str, password: str) -> Optional[User]:
|
|
"""
|
|
Authenticate a user and verify they have admin role
|
|
|
|
Args:
|
|
username: The username to authenticate
|
|
password: The password to authenticate
|
|
|
|
Returns:
|
|
User object if authenticated and is admin, None otherwise
|
|
"""
|
|
session = db_manager.get_session()
|
|
try:
|
|
user_repo = UserRepository(session)
|
|
auth_service = AuthService(user_repo)
|
|
|
|
user = auth_service.authenticate_user(username, password)
|
|
if user and user.is_admin():
|
|
return user
|
|
return None
|
|
finally:
|
|
session.close()
|
|
|
|
|
|
def prompt_admin_credentials() -> tuple[str, str]:
|
|
"""
|
|
Prompt for admin username and password
|
|
Checks environment variables CLIENT_USERNAME and CLIENT_PASSWORD first
|
|
|
|
Returns:
|
|
Tuple of (username, password)
|
|
"""
|
|
username = os.getenv("CLIENT_USERNAME")
|
|
password = os.getenv("CLIENT_PASSWORD")
|
|
|
|
if username and password:
|
|
return username, password
|
|
|
|
click.echo("Admin authentication required")
|
|
if not username:
|
|
username = click.prompt("Username", type=str)
|
|
if not password:
|
|
password = click.prompt("Password", type=str, hide_input=True)
|
|
return username, password
|
|
|
|
|
|
@click.group()
|
|
@click.version_option(version="1.0.0")
|
|
def app():
|
|
"""Content Automation & Syndication Platform CLI"""
|
|
pass
|
|
|
|
|
|
@app.command()
|
|
def config():
|
|
"""Show current configuration"""
|
|
try:
|
|
config = get_config()
|
|
click.echo("Current Configuration:")
|
|
click.echo(f"Application: {config.application.name} v{config.application.version}")
|
|
click.echo(f"Environment: {config.application.environment}")
|
|
click.echo(f"Database: {config.database.url}")
|
|
click.echo(f"AI Model: {config.ai_service.model}")
|
|
click.echo(f"Log Level: {config.logging.level}")
|
|
except Exception as e:
|
|
click.echo(f"Error loading configuration: {e}", err=True)
|
|
|
|
|
|
@app.command()
|
|
def health():
|
|
"""Check system health"""
|
|
try:
|
|
config = get_config()
|
|
click.echo("[OK] Configuration loaded successfully")
|
|
click.echo("[OK] System is healthy")
|
|
except Exception as e:
|
|
click.echo(f"[ERROR] System health check failed: {e}", err=True)
|
|
raise click.Abort()
|
|
|
|
|
|
@app.command()
|
|
def models():
|
|
"""List available AI models"""
|
|
try:
|
|
config = get_config()
|
|
click.echo("Available AI Models:")
|
|
click.echo(f"Current: {config.ai_service.model}")
|
|
click.echo(f"Provider: {config.ai_service.provider}")
|
|
click.echo(f"Base URL: {config.ai_service.base_url}")
|
|
click.echo("\nAvailable models:")
|
|
for model_name, model_id in config.ai_service.available_models.items():
|
|
status = " (current)" if model_id == config.ai_service.model else ""
|
|
click.echo(f" {model_name}: {model_id}{status}")
|
|
except Exception as e:
|
|
click.echo(f"Error listing models: {e}", err=True)
|
|
|
|
|
|
@app.command("add-user")
|
|
@click.option("--username", prompt=True, help="Username for the new user")
|
|
@click.option("--password", prompt=True, hide_input=True,
|
|
confirmation_prompt=True, help="Password for the new user")
|
|
@click.option("--role", type=click.Choice(["Admin", "User"], case_sensitive=True),
|
|
prompt=True, help="Role for the new user")
|
|
@click.option("--admin-user", help="Admin username for authentication")
|
|
@click.option("--admin-password", help="Admin password for authentication")
|
|
def add_user(username: str, password: str, role: str,
|
|
admin_user: Optional[str], admin_password: Optional[str]):
|
|
"""Create a new user (requires admin authentication)"""
|
|
try:
|
|
# Authenticate admin
|
|
if not admin_user or not admin_password:
|
|
admin_user, admin_password = prompt_admin_credentials()
|
|
|
|
admin = authenticate_admin(admin_user, admin_password)
|
|
if not admin:
|
|
click.echo("Error: Authentication failed or insufficient permissions", err=True)
|
|
raise click.Abort()
|
|
|
|
# Create the new user
|
|
session = db_manager.get_session()
|
|
try:
|
|
user_repo = UserRepository(session)
|
|
auth_service = AuthService(user_repo)
|
|
|
|
new_user = auth_service.create_user_with_hashed_password(
|
|
username=username,
|
|
password=password,
|
|
role=role
|
|
)
|
|
|
|
click.echo(f"Success: User '{new_user.username}' created with role '{new_user.role}'")
|
|
|
|
finally:
|
|
session.close()
|
|
|
|
except ValueError as e:
|
|
click.echo(f"Error: {e}", err=True)
|
|
raise click.Abort()
|
|
except Exception as e:
|
|
click.echo(f"Error creating user: {e}", err=True)
|
|
raise click.Abort()
|
|
|
|
|
|
@app.command("delete-user")
|
|
@click.option("--username", prompt=True, help="Username to delete")
|
|
@click.option("--admin-user", help="Admin username for authentication")
|
|
@click.option("--admin-password", help="Admin password for authentication")
|
|
@click.confirmation_option(prompt="Are you sure you want to delete this user?")
|
|
def delete_user(username: str, admin_user: Optional[str],
|
|
admin_password: Optional[str]):
|
|
"""Delete a user by username (requires admin authentication)"""
|
|
try:
|
|
# Authenticate admin
|
|
if not admin_user or not admin_password:
|
|
admin_user, admin_password = prompt_admin_credentials()
|
|
|
|
admin = authenticate_admin(admin_user, admin_password)
|
|
if not admin:
|
|
click.echo("Error: Authentication failed or insufficient permissions", err=True)
|
|
raise click.Abort()
|
|
|
|
# Prevent admin from deleting themselves
|
|
if admin.username == username:
|
|
click.echo("Error: Cannot delete your own account", err=True)
|
|
raise click.Abort()
|
|
|
|
# Delete the user
|
|
session = db_manager.get_session()
|
|
try:
|
|
user_repo = UserRepository(session)
|
|
|
|
# Check if user exists
|
|
user_to_delete = user_repo.get_by_username(username)
|
|
if not user_to_delete:
|
|
click.echo(f"Error: User '{username}' not found", err=True)
|
|
raise click.Abort()
|
|
|
|
# Delete the user
|
|
success = user_repo.delete(user_to_delete.id)
|
|
if success:
|
|
click.echo(f"Success: User '{username}' has been deleted")
|
|
else:
|
|
click.echo(f"Error: Failed to delete user '{username}'", err=True)
|
|
raise click.Abort()
|
|
|
|
finally:
|
|
session.close()
|
|
|
|
except Exception as e:
|
|
click.echo(f"Error deleting user: {e}", err=True)
|
|
raise click.Abort()
|
|
|
|
|
|
@app.command("list-users")
|
|
@click.option("--admin-user", help="Admin username for authentication")
|
|
@click.option("--admin-password", help="Admin password for authentication")
|
|
def list_users(admin_user: Optional[str], admin_password: Optional[str]):
|
|
"""List all users (requires admin authentication)"""
|
|
try:
|
|
# Authenticate admin
|
|
if not admin_user or not admin_password:
|
|
admin_user, admin_password = prompt_admin_credentials()
|
|
|
|
admin = authenticate_admin(admin_user, admin_password)
|
|
if not admin:
|
|
click.echo("Error: Authentication failed or insufficient permissions", err=True)
|
|
raise click.Abort()
|
|
|
|
# List all users
|
|
session = db_manager.get_session()
|
|
try:
|
|
user_repo = UserRepository(session)
|
|
users = user_repo.get_all()
|
|
|
|
if not users:
|
|
click.echo("No users found")
|
|
return
|
|
|
|
click.echo(f"\nTotal users: {len(users)}")
|
|
click.echo("-" * 60)
|
|
click.echo(f"{'ID':<5} {'Username':<20} {'Role':<10} {'Created'}")
|
|
click.echo("-" * 60)
|
|
|
|
for user in users:
|
|
created = user.created_at.strftime("%Y-%m-%d %H:%M:%S")
|
|
click.echo(f"{user.id:<5} {user.username:<20} {user.role:<10} {created}")
|
|
|
|
click.echo("-" * 60)
|
|
|
|
finally:
|
|
session.close()
|
|
|
|
except Exception as e:
|
|
click.echo(f"Error listing users: {e}", err=True)
|
|
raise click.Abort()
|
|
|
|
|
|
@app.command("provision-site")
|
|
@click.option("--name", prompt=True, help="Site name")
|
|
@click.option("--domain", prompt=True, help="Custom domain (FQDN, e.g., www.example.com)")
|
|
@click.option("--storage-name", prompt=True, help="Storage Zone name (must be globally unique)")
|
|
@click.option("--region", prompt=True, type=click.Choice(["DE", "NY", "LA", "SG", "SYD"]),
|
|
help="Storage region")
|
|
@click.option("--admin-user", help="Admin username for authentication")
|
|
@click.option("--admin-password", help="Admin password for authentication")
|
|
def provision_site(name: str, domain: str, storage_name: str, region: str,
|
|
admin_user: Optional[str], admin_password: Optional[str]):
|
|
"""Provision a new site with Storage Zone and Pull Zone (requires admin)"""
|
|
try:
|
|
# Authenticate admin
|
|
if not admin_user or not admin_password:
|
|
admin_user, admin_password = prompt_admin_credentials()
|
|
|
|
admin = authenticate_admin(admin_user, admin_password)
|
|
if not admin:
|
|
click.echo("Error: Authentication failed or insufficient permissions", err=True)
|
|
raise click.Abort()
|
|
|
|
# Get bunny.net API key
|
|
try:
|
|
api_key = get_bunny_account_api_key()
|
|
except ValueError as e:
|
|
click.echo(f"Error: {e}", err=True)
|
|
click.echo("Please set BUNNY_ACCOUNT_API_KEY in your .env file", err=True)
|
|
raise click.Abort()
|
|
|
|
click.echo(f"\nProvisioning site '{name}' with domain '{domain}'...")
|
|
|
|
# Initialize bunny.net client
|
|
client = BunnyNetClient(api_key)
|
|
session = db_manager.get_session()
|
|
|
|
try:
|
|
deployment_repo = SiteDeploymentRepository(session)
|
|
|
|
# Check if domain already exists
|
|
if deployment_repo.exists(domain):
|
|
click.echo(f"Error: Site with domain '{domain}' already exists", err=True)
|
|
raise click.Abort()
|
|
|
|
# Step 1: Create Storage Zone
|
|
click.echo(f"Step 1/3: Creating Storage Zone '{storage_name}' in region {region}...")
|
|
storage_result = client.create_storage_zone(storage_name, region)
|
|
click.echo(f" Storage Zone created: ID={storage_result.id}")
|
|
|
|
# Step 2: Create Pull Zone
|
|
pull_zone_name = f"{storage_name}-cdn"
|
|
click.echo(f"Step 2/3: Creating Pull Zone '{pull_zone_name}'...")
|
|
pull_result = client.create_pull_zone(pull_zone_name, storage_result.id)
|
|
click.echo(f" Pull Zone created: ID={pull_result.id}, Hostname={pull_result.hostname}")
|
|
|
|
# Step 3: Add Custom Hostname
|
|
click.echo(f"Step 3/3: Adding custom hostname '{domain}'...")
|
|
client.add_custom_hostname(pull_result.id, domain)
|
|
click.echo(f" Custom hostname added successfully")
|
|
|
|
# Save to database
|
|
deployment = deployment_repo.create(
|
|
site_name=name,
|
|
custom_hostname=domain,
|
|
storage_zone_id=storage_result.id,
|
|
storage_zone_name=storage_result.name,
|
|
storage_zone_password=storage_result.password,
|
|
storage_zone_region=storage_result.region,
|
|
pull_zone_id=pull_result.id,
|
|
pull_zone_bcdn_hostname=pull_result.hostname
|
|
)
|
|
|
|
# Randomly assign template
|
|
template_service = TemplateService()
|
|
available_templates = template_service.get_available_templates()
|
|
if available_templates:
|
|
deployment.template_name = random.choice(available_templates)
|
|
session.commit()
|
|
session.refresh(deployment)
|
|
click.echo(f" Template assigned: {deployment.template_name}")
|
|
|
|
click.echo("\n" + "=" * 70)
|
|
click.echo("Site provisioned successfully!")
|
|
click.echo("=" * 70)
|
|
click.echo("\nMANUAL DNS CONFIGURATION REQUIRED:")
|
|
click.echo("You must create the following CNAME record with your domain registrar:\n")
|
|
click.echo(f" Type: CNAME")
|
|
subdomain = domain.split('.')[0] if '.' in domain else '@'
|
|
click.echo(f" Host: {subdomain}")
|
|
click.echo(f" Value: {pull_result.hostname}")
|
|
click.echo("\nExample DNS configuration:")
|
|
click.echo(f" Type: CNAME")
|
|
click.echo(f" Host: {subdomain}")
|
|
click.echo(f" Value: {pull_result.hostname}")
|
|
click.echo("\nNote: DNS propagation may take up to 48 hours.")
|
|
click.echo("=" * 70)
|
|
|
|
except BunnyNetAuthError as e:
|
|
click.echo(f"Error: Authentication failed - {e}", err=True)
|
|
click.echo("Please check your BUNNY_ACCOUNT_API_KEY", err=True)
|
|
raise click.Abort()
|
|
except BunnyNetResourceConflictError as e:
|
|
click.echo(f"Error: Resource conflict - {e}", err=True)
|
|
click.echo("Storage Zone or Pull Zone name already exists. Try a different name.", err=True)
|
|
raise click.Abort()
|
|
except BunnyNetAPIError as e:
|
|
click.echo(f"Error: bunny.net API error - {e}", err=True)
|
|
raise click.Abort()
|
|
finally:
|
|
session.close()
|
|
|
|
except Exception as e:
|
|
click.echo(f"Error provisioning site: {e}", err=True)
|
|
raise click.Abort()
|
|
|
|
|
|
@app.command("attach-domain")
|
|
@click.option("--name", prompt=True, help="Site name")
|
|
@click.option("--domain", prompt=True, help="Custom domain (FQDN, e.g., www.example.com)")
|
|
@click.option("--storage-name", prompt=True, help="Existing Storage Zone name")
|
|
@click.option("--admin-user", help="Admin username for authentication")
|
|
@click.option("--admin-password", help="Admin password for authentication")
|
|
def attach_domain(name: str, domain: str, storage_name: str,
|
|
admin_user: Optional[str], admin_password: Optional[str]):
|
|
"""Attach a domain to an existing Storage Zone (requires admin)"""
|
|
try:
|
|
# Authenticate admin
|
|
if not admin_user or not admin_password:
|
|
admin_user, admin_password = prompt_admin_credentials()
|
|
|
|
admin = authenticate_admin(admin_user, admin_password)
|
|
if not admin:
|
|
click.echo("Error: Authentication failed or insufficient permissions", err=True)
|
|
raise click.Abort()
|
|
|
|
# Get bunny.net API key
|
|
try:
|
|
api_key = get_bunny_account_api_key()
|
|
except ValueError as e:
|
|
click.echo(f"Error: {e}", err=True)
|
|
click.echo("Please set BUNNY_ACCOUNT_API_KEY in your .env file", err=True)
|
|
raise click.Abort()
|
|
|
|
click.echo(f"\nAttaching domain '{domain}' to existing Storage Zone '{storage_name}'...")
|
|
|
|
# Initialize bunny.net client
|
|
client = BunnyNetClient(api_key)
|
|
session = db_manager.get_session()
|
|
|
|
try:
|
|
deployment_repo = SiteDeploymentRepository(session)
|
|
|
|
# Check if domain already exists
|
|
if deployment_repo.exists(domain):
|
|
click.echo(f"Error: Site with domain '{domain}' already exists", err=True)
|
|
raise click.Abort()
|
|
|
|
# Step 1: Find existing Storage Zone
|
|
click.echo(f"Step 1/3: Finding Storage Zone '{storage_name}'...")
|
|
storage_result = client.find_storage_zone_by_name(storage_name)
|
|
if not storage_result:
|
|
click.echo(f"Error: Storage Zone '{storage_name}' not found", err=True)
|
|
raise click.Abort()
|
|
click.echo(f" Storage Zone found: ID={storage_result.id}")
|
|
|
|
# Step 2: Create Pull Zone
|
|
pull_zone_name = f"{storage_name}-{domain.replace('.', '-')}"
|
|
click.echo(f"Step 2/3: Creating Pull Zone '{pull_zone_name}'...")
|
|
pull_result = client.create_pull_zone(pull_zone_name, storage_result.id)
|
|
click.echo(f" Pull Zone created: ID={pull_result.id}, Hostname={pull_result.hostname}")
|
|
|
|
# Step 3: Add Custom Hostname
|
|
click.echo(f"Step 3/3: Adding custom hostname '{domain}'...")
|
|
client.add_custom_hostname(pull_result.id, domain)
|
|
click.echo(f" Custom hostname added successfully")
|
|
|
|
# Save to database
|
|
deployment = deployment_repo.create(
|
|
site_name=name,
|
|
custom_hostname=domain,
|
|
storage_zone_id=storage_result.id,
|
|
storage_zone_name=storage_result.name,
|
|
storage_zone_password=storage_result.password,
|
|
storage_zone_region=storage_result.region,
|
|
pull_zone_id=pull_result.id,
|
|
pull_zone_bcdn_hostname=pull_result.hostname
|
|
)
|
|
|
|
# Randomly assign template
|
|
template_service = TemplateService()
|
|
available_templates = template_service.get_available_templates()
|
|
if available_templates:
|
|
deployment.template_name = random.choice(available_templates)
|
|
session.commit()
|
|
session.refresh(deployment)
|
|
click.echo(f" Template assigned: {deployment.template_name}")
|
|
|
|
click.echo("\n" + "=" * 70)
|
|
click.echo("Domain attached successfully!")
|
|
click.echo("=" * 70)
|
|
click.echo("\nMANUAL DNS CONFIGURATION REQUIRED:")
|
|
click.echo("You must create the following CNAME record with your domain registrar:\n")
|
|
click.echo(f" Type: CNAME")
|
|
subdomain = domain.split('.')[0] if '.' in domain else '@'
|
|
click.echo(f" Host: {subdomain}")
|
|
click.echo(f" Value: {pull_result.hostname}")
|
|
click.echo("\nExample DNS configuration:")
|
|
click.echo(f" Type: CNAME")
|
|
click.echo(f" Host: {subdomain}")
|
|
click.echo(f" Value: {pull_result.hostname}")
|
|
click.echo("\nNote: DNS propagation may take up to 48 hours.")
|
|
click.echo("=" * 70)
|
|
|
|
except BunnyNetAuthError as e:
|
|
click.echo(f"Error: Authentication failed - {e}", err=True)
|
|
click.echo("Please check your BUNNY_ACCOUNT_API_KEY", err=True)
|
|
raise click.Abort()
|
|
except BunnyNetResourceConflictError as e:
|
|
click.echo(f"Error: Resource conflict - {e}", err=True)
|
|
click.echo("Pull Zone name already exists. Try a different domain.", err=True)
|
|
raise click.Abort()
|
|
except BunnyNetAPIError as e:
|
|
click.echo(f"Error: bunny.net API error - {e}", err=True)
|
|
raise click.Abort()
|
|
finally:
|
|
session.close()
|
|
|
|
except Exception as e:
|
|
click.echo(f"Error attaching domain: {e}", err=True)
|
|
raise click.Abort()
|
|
|
|
|
|
@app.command("list-sites")
|
|
@click.option("--admin-user", help="Admin username for authentication")
|
|
@click.option("--admin-password", help="Admin password for authentication")
|
|
def list_sites(admin_user: Optional[str], admin_password: Optional[str]):
|
|
"""List all site deployments (requires admin)"""
|
|
try:
|
|
# Authenticate admin
|
|
if not admin_user or not admin_password:
|
|
admin_user, admin_password = prompt_admin_credentials()
|
|
|
|
admin = authenticate_admin(admin_user, admin_password)
|
|
if not admin:
|
|
click.echo("Error: Authentication failed or insufficient permissions", err=True)
|
|
raise click.Abort()
|
|
|
|
# List all sites
|
|
session = db_manager.get_session()
|
|
try:
|
|
deployment_repo = SiteDeploymentRepository(session)
|
|
sites = deployment_repo.get_all()
|
|
|
|
if not sites:
|
|
click.echo("No site deployments found")
|
|
return
|
|
|
|
click.echo(f"\nTotal sites: {len(sites)}")
|
|
click.echo("-" * 100)
|
|
click.echo(f"{'ID':<5} {'Site Name':<25} {'Custom Domain':<30} {'Storage Zone':<20} {'Region':<8}")
|
|
click.echo("-" * 100)
|
|
|
|
for site in sites:
|
|
click.echo(f"{site.id:<5} {site.site_name:<25} {site.custom_hostname or 'N/A':<30} "
|
|
f"{site.storage_zone_name:<20} {site.storage_zone_region:<8}")
|
|
|
|
click.echo("-" * 100)
|
|
|
|
finally:
|
|
session.close()
|
|
|
|
except Exception as e:
|
|
click.echo(f"Error listing sites: {e}", err=True)
|
|
raise click.Abort()
|
|
|
|
|
|
@app.command("discover-s3-buckets")
|
|
def discover_s3_buckets():
|
|
"""Discover and register AWS S3 buckets as site deployments"""
|
|
try:
|
|
# Import here to avoid circular dependencies
|
|
import subprocess
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# Get the script path
|
|
script_dir = Path(__file__).parent.parent.parent
|
|
script_path = script_dir / "scripts" / "discover_s3_buckets.py"
|
|
|
|
if not script_path.exists():
|
|
click.echo(f"Error: Discovery script not found at {script_path}", err=True)
|
|
raise click.Abort()
|
|
|
|
# Run the discovery script
|
|
click.echo("Running S3 bucket discovery script...\n")
|
|
result = subprocess.run([sys.executable, str(script_path)], check=False)
|
|
|
|
if result.returncode != 0:
|
|
click.echo(f"\nDiscovery script exited with code {result.returncode}", err=True)
|
|
raise click.Abort()
|
|
|
|
except FileNotFoundError:
|
|
click.echo("Error: Discovery script not found", err=True)
|
|
raise click.Abort()
|
|
except Exception as e:
|
|
click.echo(f"Error running discovery script: {e}", err=True)
|
|
raise click.Abort()
|
|
|
|
|
|
@app.command("get-site")
|
|
@click.option("--domain", prompt=True, help="Custom domain to lookup")
|
|
@click.option("--admin-user", help="Admin username for authentication")
|
|
@click.option("--admin-password", help="Admin password for authentication")
|
|
def get_site(domain: str, admin_user: Optional[str], admin_password: Optional[str]):
|
|
"""Get detailed information about a site deployment (requires admin)"""
|
|
try:
|
|
# Authenticate admin
|
|
if not admin_user or not admin_password:
|
|
admin_user, admin_password = prompt_admin_credentials()
|
|
|
|
admin = authenticate_admin(admin_user, admin_password)
|
|
if not admin:
|
|
click.echo("Error: Authentication failed or insufficient permissions", err=True)
|
|
raise click.Abort()
|
|
|
|
# Get site details
|
|
session = db_manager.get_session()
|
|
try:
|
|
deployment_repo = SiteDeploymentRepository(session)
|
|
site = deployment_repo.get_by_hostname(domain)
|
|
|
|
if not site:
|
|
click.echo(f"Error: Site with domain '{domain}' not found", err=True)
|
|
raise click.Abort()
|
|
|
|
click.echo("\n" + "=" * 70)
|
|
click.echo("Site Deployment Details")
|
|
click.echo("=" * 70)
|
|
click.echo(f"ID: {site.id}")
|
|
click.echo(f"Site Name: {site.site_name}")
|
|
click.echo(f"Custom Domain: {site.custom_hostname}")
|
|
click.echo(f"\nStorage Zone:")
|
|
click.echo(f" ID: {site.storage_zone_id}")
|
|
click.echo(f" Name: {site.storage_zone_name}")
|
|
click.echo(f" Region: {site.storage_zone_region}")
|
|
click.echo(f" Password: {site.storage_zone_password}")
|
|
click.echo(f"\nPull Zone:")
|
|
click.echo(f" ID: {site.pull_zone_id}")
|
|
click.echo(f" b-cdn Hostname: {site.pull_zone_bcdn_hostname}")
|
|
click.echo(f"\nTimestamps:")
|
|
click.echo(f" Created: {site.created_at.strftime('%Y-%m-%d %H:%M:%S')}")
|
|
click.echo(f" Updated: {site.updated_at.strftime('%Y-%m-%d %H:%M:%S')}")
|
|
click.echo("=" * 70)
|
|
|
|
finally:
|
|
session.close()
|
|
|
|
except Exception as e:
|
|
click.echo(f"Error getting site details: {e}", err=True)
|
|
raise click.Abort()
|
|
|
|
|
|
@app.command("remove-site")
|
|
@click.option("--domain", prompt=True, help="Custom domain to remove")
|
|
@click.option("--admin-user", help="Admin username for authentication")
|
|
@click.option("--admin-password", help="Admin password for authentication")
|
|
@click.confirmation_option(prompt="Are you sure you want to remove this site deployment record?")
|
|
def remove_site(domain: str, admin_user: Optional[str], admin_password: Optional[str]):
|
|
"""Remove a site deployment record (requires admin)"""
|
|
try:
|
|
# Authenticate admin
|
|
if not admin_user or not admin_password:
|
|
admin_user, admin_password = prompt_admin_credentials()
|
|
|
|
admin = authenticate_admin(admin_user, admin_password)
|
|
if not admin:
|
|
click.echo("Error: Authentication failed or insufficient permissions", err=True)
|
|
raise click.Abort()
|
|
|
|
# Remove site
|
|
session = db_manager.get_session()
|
|
try:
|
|
deployment_repo = SiteDeploymentRepository(session)
|
|
|
|
# Check if site exists
|
|
site = deployment_repo.get_by_hostname(domain)
|
|
if not site:
|
|
click.echo(f"Error: Site with domain '{domain}' not found", err=True)
|
|
raise click.Abort()
|
|
|
|
# Delete the site
|
|
success = deployment_repo.delete(site.id)
|
|
if success:
|
|
click.echo(f"Success: Site deployment record for '{domain}' has been removed")
|
|
click.echo("\nNote: This does NOT delete resources from bunny.net.")
|
|
click.echo("You must manually delete the Storage Zone and Pull Zone if needed.")
|
|
else:
|
|
click.echo(f"Error: Failed to remove site '{domain}'", err=True)
|
|
raise click.Abort()
|
|
|
|
finally:
|
|
session.close()
|
|
|
|
except Exception as e:
|
|
click.echo(f"Error removing site: {e}", err=True)
|
|
raise click.Abort()
|
|
|
|
|
|
@app.command("sync-sites")
|
|
@click.option("--admin-user", help="Admin username for authentication")
|
|
@click.option("--admin-password", help="Admin password for authentication")
|
|
@click.option("--dry-run", is_flag=True, help="Show what would be imported without making changes")
|
|
def sync_sites(admin_user: Optional[str], admin_password: Optional[str], dry_run: bool):
|
|
"""Sync existing bunny.net sites with custom domains to database (requires admin)"""
|
|
try:
|
|
# Authenticate admin
|
|
if not admin_user or not admin_password:
|
|
admin_user, admin_password = prompt_admin_credentials()
|
|
|
|
admin = authenticate_admin(admin_user, admin_password)
|
|
if not admin:
|
|
click.echo("Error: Authentication failed or insufficient permissions", err=True)
|
|
raise click.Abort()
|
|
|
|
# Get bunny.net API key
|
|
try:
|
|
api_key = get_bunny_account_api_key()
|
|
except ValueError as e:
|
|
click.echo(f"Error: {e}", err=True)
|
|
click.echo("Please set BUNNY_ACCOUNT_API_KEY in your .env file", err=True)
|
|
raise click.Abort()
|
|
|
|
click.echo("\nSyncing sites from bunny.net...")
|
|
if dry_run:
|
|
click.echo("DRY RUN MODE - No changes will be made\n")
|
|
|
|
# Initialize bunny.net client
|
|
client = BunnyNetClient(api_key)
|
|
session = db_manager.get_session()
|
|
|
|
try:
|
|
deployment_repo = SiteDeploymentRepository(session)
|
|
|
|
# Get all storage zones (with passwords!)
|
|
click.echo("Fetching Storage Zones from bunny.net...")
|
|
storage_zones = client.get_storage_zones()
|
|
storage_zone_map = {zone["Id"]: zone for zone in storage_zones}
|
|
click.echo(f" Found {len(storage_zones)} Storage Zones")
|
|
|
|
# Get all pull zones
|
|
click.echo("Fetching Pull Zones from bunny.net...")
|
|
pull_zones = client.get_pull_zones()
|
|
click.echo(f" Found {len(pull_zones)} Pull Zones")
|
|
|
|
# Process pull zones with custom hostnames
|
|
imported = 0
|
|
skipped = 0
|
|
errors = 0
|
|
|
|
click.echo("\nProcessing Pull Zones with custom domains...")
|
|
click.echo("=" * 80)
|
|
|
|
for pz in pull_zones:
|
|
# Skip if not linked to storage zone
|
|
if not pz.get("StorageZoneId"):
|
|
continue
|
|
|
|
storage_zone_id = pz["StorageZoneId"]
|
|
storage_zone = storage_zone_map.get(storage_zone_id)
|
|
|
|
if not storage_zone:
|
|
continue
|
|
|
|
# Get pull zone details to see hostnames
|
|
pz_details = client.get_pull_zone(pz["Id"])
|
|
if not pz_details:
|
|
continue
|
|
|
|
hostnames = pz_details.get("Hostnames", [])
|
|
|
|
# Get the default b-cdn hostname
|
|
default_hostname = next(
|
|
(h["Value"] for h in hostnames if h.get("Value") and h["Value"].endswith(".b-cdn.net")),
|
|
f"{pz['Name']}.b-cdn.net"
|
|
)
|
|
|
|
# Filter for custom hostnames (not *.b-cdn.net)
|
|
custom_hostnames = [
|
|
h["Value"] for h in hostnames
|
|
if h.get("Value") and not h["Value"].endswith(".b-cdn.net")
|
|
]
|
|
|
|
# Create list of sites to import: custom domains first, then bcdn-only if no custom domains
|
|
sites_to_import = []
|
|
if custom_hostnames:
|
|
for ch in custom_hostnames:
|
|
sites_to_import.append((ch, default_hostname))
|
|
else:
|
|
sites_to_import.append((None, default_hostname))
|
|
|
|
# Import each site deployment
|
|
for custom_hostname, bcdn_hostname in sites_to_import:
|
|
try:
|
|
# Check if already exists
|
|
check_hostname = custom_hostname or bcdn_hostname
|
|
if deployment_repo.exists(check_hostname):
|
|
click.echo(f"SKIP: {check_hostname} (already in database)")
|
|
skipped += 1
|
|
continue
|
|
|
|
if dry_run:
|
|
click.echo(f"WOULD IMPORT: {check_hostname}")
|
|
click.echo(f" Storage Zone: {storage_zone['Name']} (Region: {storage_zone.get('Region', 'Unknown')})")
|
|
click.echo(f" Pull Zone: {pz['Name']} (ID: {pz['Id']})")
|
|
click.echo(f" b-cdn Hostname: {bcdn_hostname}")
|
|
if custom_hostname:
|
|
click.echo(f" Custom Domain: {custom_hostname}")
|
|
imported += 1
|
|
else:
|
|
# Create site deployment
|
|
deployment = deployment_repo.create(
|
|
site_name=storage_zone['Name'],
|
|
storage_zone_id=storage_zone['Id'],
|
|
storage_zone_name=storage_zone['Name'],
|
|
storage_zone_password=storage_zone.get('Password', ''),
|
|
storage_zone_region=storage_zone.get('Region', ''),
|
|
pull_zone_id=pz['Id'],
|
|
pull_zone_bcdn_hostname=bcdn_hostname,
|
|
custom_hostname=custom_hostname
|
|
)
|
|
|
|
# Randomly assign template
|
|
template_service = TemplateService()
|
|
available_templates = template_service.get_available_templates()
|
|
if available_templates:
|
|
deployment.template_name = random.choice(available_templates)
|
|
session.commit()
|
|
session.refresh(deployment)
|
|
|
|
click.echo(f"IMPORTED: {check_hostname}")
|
|
click.echo(f" Storage Zone: {storage_zone['Name']} (Region: {storage_zone.get('Region', 'Unknown')})")
|
|
click.echo(f" Pull Zone: {pz['Name']} (ID: {pz['Id']})")
|
|
if custom_hostname:
|
|
click.echo(f" Custom Domain: {custom_hostname}")
|
|
click.echo(f" Template: {deployment.template_name}")
|
|
imported += 1
|
|
|
|
except Exception as e:
|
|
click.echo(f"ERROR importing {check_hostname}: {e}", err=True)
|
|
errors += 1
|
|
|
|
click.echo("=" * 80)
|
|
click.echo(f"\nSync Summary:")
|
|
click.echo(f" Imported: {imported}")
|
|
click.echo(f" Skipped (already exists): {skipped}")
|
|
click.echo(f" Errors: {errors}")
|
|
|
|
if dry_run:
|
|
click.echo("\nDRY RUN complete - no changes were made")
|
|
click.echo("Run without --dry-run to import these sites")
|
|
|
|
except BunnyNetAuthError as e:
|
|
click.echo(f"Error: Authentication failed - {e}", err=True)
|
|
click.echo("Please check your BUNNY_ACCOUNT_API_KEY", err=True)
|
|
raise click.Abort()
|
|
except BunnyNetAPIError as e:
|
|
click.echo(f"Error: bunny.net API error - {e}", err=True)
|
|
raise click.Abort()
|
|
finally:
|
|
session.close()
|
|
|
|
except Exception as e:
|
|
click.echo(f"Error syncing sites: {e}", err=True)
|
|
raise click.Abort()
|
|
|
|
|
|
@app.command()
|
|
@click.option('--file', '-f', 'file_path', required=True, type=click.Path(exists=True), help='Path to CORA .xlsx file')
|
|
@click.option('--name', '-n', required=True, help='Project name')
|
|
@click.option('--money-site-url', '-m', help='Money site URL (e.g., https://example.com)')
|
|
@click.option('--custom-anchors', '-a', help='Comma-separated list of custom anchor text (optional)')
|
|
@click.option('--tier1-branded-ratio', '-t', default=None, type=float, help='Ratio of branded anchor text for tier1 (optional, only prompts if provided)')
|
|
@click.option('--tier1-branded-plus-ratio', '-bp', default=None, type=float, help='Ratio of branded+ anchor text for tier1 (optional, applied to remaining slots after branded)')
|
|
@click.option('--random-deployment-targets', '-r', type=int, help='Number of random deployment targets to select (default: random 2-3)')
|
|
@click.option('--tier1-count', type=int, help='Number of tier1 articles (default: random 10-12)')
|
|
@click.option('--username', '-u', help='Username for authentication')
|
|
@click.option('--password', '-p', help='Password for authentication')
|
|
def ingest_cora(file_path: str, name: str, money_site_url: Optional[str], custom_anchors: Optional[str], tier1_branded_ratio: float, tier1_branded_plus_ratio: Optional[float], random_deployment_targets: Optional[int], tier1_count: Optional[int], username: Optional[str], password: Optional[str]):
|
|
"""Ingest a CORA .xlsx report and create a new project"""
|
|
try:
|
|
if not username or not password:
|
|
username, password = prompt_admin_credentials()
|
|
|
|
session = db_manager.get_session()
|
|
try:
|
|
user_repo = UserRepository(session)
|
|
auth_service = AuthService(user_repo)
|
|
|
|
user = auth_service.authenticate_user(username, password)
|
|
if not user:
|
|
click.echo("Error: Authentication failed", err=True)
|
|
raise click.Abort()
|
|
|
|
click.echo(f"Authenticated as: {user.username} ({user.role})")
|
|
|
|
click.echo(f"\nParsing CORA file: {file_path}")
|
|
|
|
custom_anchor_list = []
|
|
if custom_anchors:
|
|
custom_anchor_list = [anchor.strip() for anchor in custom_anchors.split(',') if anchor.strip()]
|
|
|
|
parser = CORAParser(file_path)
|
|
cora_data = parser.parse(custom_anchor_text=custom_anchor_list)
|
|
|
|
click.echo(f"Main Keyword: {cora_data['main_keyword']}")
|
|
click.echo(f"Word Count: {cora_data['word_count']}")
|
|
click.echo(f"Entities Found: {len(cora_data['entities'])}")
|
|
click.echo(f"Related Searches: {len(cora_data['related_searches'])}")
|
|
|
|
# Prompt for money_site_url if not provided
|
|
if not money_site_url:
|
|
money_site_url = click.prompt(
|
|
"\nEnter money site URL (required for tiered linking)",
|
|
type=str
|
|
)
|
|
|
|
# Validate money_site_url
|
|
if not money_site_url.startswith('http://') and not money_site_url.startswith('https://'):
|
|
click.echo("Error: Money site URL must start with http:// or https://", err=True)
|
|
raise click.Abort()
|
|
|
|
# Clean up URL (remove trailing slash)
|
|
money_site_url = money_site_url.rstrip('/')
|
|
|
|
click.echo(f"\nCreating project: {name}")
|
|
click.echo(f"Money Site URL: {money_site_url}")
|
|
|
|
# Add money_site_url to cora_data
|
|
cora_data['money_site_url'] = money_site_url
|
|
|
|
project_repo = ProjectRepository(session)
|
|
project = project_repo.create(
|
|
user_id=user.id,
|
|
name=name,
|
|
data=cora_data
|
|
)
|
|
|
|
click.echo(f"\nSuccess: Project '{project.name}' created (ID: {project.id})")
|
|
click.echo(f"Main Keyword: {project.main_keyword}")
|
|
click.echo(f"Money Site URL: {project.money_site_url}")
|
|
click.echo(f"Entities: {len(project.entities or [])}")
|
|
click.echo(f"Related Searches: {len(project.related_searches or [])}")
|
|
|
|
if project.custom_anchor_text:
|
|
click.echo(f"Custom Anchor Text: {', '.join(project.custom_anchor_text)}")
|
|
|
|
# Handle tier1 branded anchor text if ratio is specified
|
|
tier1_branded_text = None
|
|
brand_names = None
|
|
if tier1_branded_ratio is not None and tier1_branded_ratio > 0:
|
|
# Look up default brands from brand mapping
|
|
default_brands = _get_brands_for_url(money_site_url)
|
|
default_prompt = ""
|
|
if default_brands:
|
|
default_prompt = f" [default: '{', '.join(default_brands)}'] (press Enter for default)"
|
|
|
|
tier1_branded_text = click.prompt(
|
|
f"\nEnter branded anchor text (company name) for tier1 (comma-separated for multiple, e.g., 'AGI Fabricators, AGI'){default_prompt}",
|
|
type=str,
|
|
default=""
|
|
).strip()
|
|
|
|
# Use defaults if Enter was pressed and defaults exist
|
|
if not tier1_branded_text and default_brands:
|
|
tier1_branded_text = ", ".join(default_brands)
|
|
click.echo(f"Using default brands: {tier1_branded_text}")
|
|
|
|
if not tier1_branded_text:
|
|
click.echo("Warning: Empty branded anchor text provided, skipping tier1 branded anchor text configuration.", err=True)
|
|
tier1_branded_text = None
|
|
tier1_branded_ratio = None
|
|
else:
|
|
# Parse brand names for branded+ generation
|
|
brand_names = [text.strip() for text in tier1_branded_text.split(',') if text.strip()]
|
|
|
|
# Handle branded+ ratio if flag is provided
|
|
if tier1_branded_plus_ratio is not None:
|
|
# Validate the provided ratio
|
|
if tier1_branded_plus_ratio <= 0 or tier1_branded_plus_ratio > 1:
|
|
click.echo("Warning: Invalid branded+ ratio provided, skipping branded+ configuration.", err=True)
|
|
tier1_branded_plus_ratio = None
|
|
elif not brand_names:
|
|
# If brand names weren't set from branded prompt, try to get them from brand lookup
|
|
default_brands = _get_brands_for_url(money_site_url)
|
|
if default_brands:
|
|
brand_names = default_brands
|
|
click.echo(f"Using brand names from mapping for branded+: {', '.join(brand_names)}")
|
|
else:
|
|
click.echo("Warning: No brand names available for branded+ (set --tier1-branded-ratio or add to brands.json). Skipping branded+ configuration.", err=True)
|
|
tier1_branded_plus_ratio = None
|
|
|
|
job_file = create_job_file_for_project(
|
|
project.id,
|
|
project.name,
|
|
session,
|
|
tier1_branded_ratio=tier1_branded_ratio,
|
|
tier1_branded_text=tier1_branded_text,
|
|
tier1_branded_plus_ratio=tier1_branded_plus_ratio,
|
|
brand_names=brand_names,
|
|
random_deployment_targets=random_deployment_targets
|
|
)
|
|
if job_file:
|
|
click.echo(f"Job file created: {job_file}")
|
|
|
|
except CORAParseError as e:
|
|
click.echo(f"Error parsing CORA file: {e}", err=True)
|
|
raise click.Abort()
|
|
except ValueError as e:
|
|
click.echo(f"Error creating project: {e}", err=True)
|
|
raise click.Abort()
|
|
finally:
|
|
session.close()
|
|
|
|
except Exception as e:
|
|
click.echo(f"Error ingesting CORA file: {e}", err=True)
|
|
raise click.Abort()
|
|
|
|
|
|
@app.command()
|
|
@click.option('--file', '-f', 'file_path', required=True, type=click.Path(exists=True), help='Path to simple .xlsx spreadsheet file')
|
|
@click.option('--name', '-n', help='Project name (overrides project_name from spreadsheet if provided)')
|
|
@click.option('--money-site-url', '-m', help='Money site URL (e.g., https://example.com)')
|
|
@click.option('--username', '-u', help='Username for authentication')
|
|
@click.option('--password', '-p', help='Password for authentication')
|
|
def ingest_simple(file_path: str, name: Optional[str], money_site_url: Optional[str], username: Optional[str], password: Optional[str]):
|
|
"""Ingest a simple spreadsheet and create a new project
|
|
|
|
Expected spreadsheet format:
|
|
- First row: Headers (main_keyword, project_name, related_searches, entities)
|
|
- Second row: Data values
|
|
|
|
Required columns: main_keyword, project_name, related_searches, entities
|
|
- main_keyword: Single phrase keyword
|
|
- project_name: Name for the project
|
|
- related_searches: Comma-delimited list (e.g., "term1, term2, term3")
|
|
- entities: Comma-delimited list (e.g., "entity1, entity2, entity3")
|
|
|
|
Optional columns (with defaults):
|
|
- word_count: Default 1500
|
|
- term_frequency: Default 3
|
|
"""
|
|
try:
|
|
if not username or not password:
|
|
username, password = prompt_admin_credentials()
|
|
|
|
session = db_manager.get_session()
|
|
try:
|
|
user_repo = UserRepository(session)
|
|
auth_service = AuthService(user_repo)
|
|
|
|
user = auth_service.authenticate_user(username, password)
|
|
if not user:
|
|
click.echo("Error: Authentication failed", err=True)
|
|
raise click.Abort()
|
|
|
|
click.echo(f"Authenticated as: {user.username} ({user.role})")
|
|
|
|
click.echo(f"\nParsing simple spreadsheet: {file_path}")
|
|
|
|
parser = SimpleSpreadsheetParser(file_path)
|
|
data = parser.parse()
|
|
|
|
project_name = name or data.get("project_name")
|
|
if not project_name:
|
|
click.echo("Error: Project name is required (provide via --name or in spreadsheet)", err=True)
|
|
raise click.Abort()
|
|
|
|
click.echo(f"Main Keyword: {data['main_keyword']}")
|
|
click.echo(f"Project Name: {project_name}")
|
|
click.echo(f"Word Count: {data['word_count']}")
|
|
click.echo(f"Term Frequency: {data['term_frequency']}")
|
|
click.echo(f"Entities: {len(data['entities'])}")
|
|
click.echo(f"Related Searches: {len(data['related_searches'])}")
|
|
|
|
if data['entities']:
|
|
click.echo(f" Entities: {', '.join(data['entities'][:5])}" + (f" ... (+{len(data['entities']) - 5} more)" if len(data['entities']) > 5 else ""))
|
|
|
|
if data['related_searches']:
|
|
click.echo(f" Related Searches: {', '.join(data['related_searches'][:5])}" + (f" ... (+{len(data['related_searches']) - 5} more)" if len(data['related_searches']) > 5 else ""))
|
|
|
|
if not money_site_url:
|
|
money_site_url = click.prompt(
|
|
"\nEnter money site URL (required for tiered linking)",
|
|
type=str
|
|
)
|
|
|
|
if not money_site_url.startswith('http://') and not money_site_url.startswith('https://'):
|
|
click.echo("Error: Money site URL must start with http:// or https://", err=True)
|
|
raise click.Abort()
|
|
|
|
money_site_url = money_site_url.rstrip('/')
|
|
|
|
click.echo(f"\nCreating project: {project_name}")
|
|
click.echo(f"Money Site URL: {money_site_url}")
|
|
|
|
data['money_site_url'] = money_site_url
|
|
|
|
project_data = {k: v for k, v in data.items() if k != 'project_name'}
|
|
|
|
project_repo = ProjectRepository(session)
|
|
project = project_repo.create(
|
|
user_id=user.id,
|
|
name=project_name,
|
|
data=project_data
|
|
)
|
|
|
|
click.echo(f"\nSuccess: Project '{project.name}' created (ID: {project.id})")
|
|
click.echo(f"Main Keyword: {project.main_keyword}")
|
|
click.echo(f"Money Site URL: {project.money_site_url}")
|
|
click.echo(f"Word Count: {project.word_count}")
|
|
click.echo(f"Term Frequency: {project.term_frequency}")
|
|
click.echo(f"Entities: {len(project.entities or [])}")
|
|
click.echo(f"Related Searches: {len(project.related_searches or [])}")
|
|
|
|
except CORAParseError as e:
|
|
click.echo(f"Error parsing spreadsheet: {e}", err=True)
|
|
raise click.Abort()
|
|
except ValueError as e:
|
|
click.echo(f"Error creating project: {e}", err=True)
|
|
raise click.Abort()
|
|
finally:
|
|
session.close()
|
|
|
|
except Exception as e:
|
|
click.echo(f"Error ingesting spreadsheet: {e}", err=True)
|
|
raise click.Abort()
|
|
|
|
|
|
@app.command()
|
|
@click.option('--username', '-u', help='Username for authentication')
|
|
@click.option('--password', '-p', help='Password for authentication')
|
|
def list_projects(username: Optional[str], password: Optional[str]):
|
|
"""List all projects for the authenticated user"""
|
|
try:
|
|
if not username or not password:
|
|
username, password = prompt_admin_credentials()
|
|
|
|
session = db_manager.get_session()
|
|
try:
|
|
user_repo = UserRepository(session)
|
|
auth_service = AuthService(user_repo)
|
|
|
|
user = auth_service.authenticate_user(username, password)
|
|
if not user:
|
|
click.echo("Error: Authentication failed", err=True)
|
|
raise click.Abort()
|
|
|
|
project_repo = ProjectRepository(session)
|
|
|
|
if user.is_admin():
|
|
projects = project_repo.get_all()
|
|
click.echo(f"\nAll Projects (Admin View):")
|
|
else:
|
|
projects = project_repo.get_by_user_id(user.id)
|
|
click.echo(f"\nYour Projects:")
|
|
|
|
if not projects:
|
|
click.echo("No projects found")
|
|
return
|
|
|
|
click.echo(f"Total projects: {len(projects)}")
|
|
click.echo("-" * 80)
|
|
click.echo(f"{'ID':<5} {'Name':<30} {'Keyword':<25} {'Created':<20}")
|
|
click.echo("-" * 80)
|
|
|
|
for project in projects:
|
|
created_str = project.created_at.strftime('%Y-%m-%d %H:%M:%S')
|
|
click.echo(f"{project.id:<5} {project.name[:29]:<30} {project.main_keyword[:24]:<25} {created_str:<20}")
|
|
|
|
click.echo("-" * 80)
|
|
|
|
finally:
|
|
session.close()
|
|
|
|
except Exception as e:
|
|
click.echo(f"Error listing projects: {e}", err=True)
|
|
raise click.Abort()
|
|
|
|
|
|
@app.command("create-job")
|
|
@click.option('--project-id', '-p', required=True, type=int, help='Project ID to create job file for')
|
|
@click.option('--deployment-targets', '-d', multiple=True, help='Deployment target hostnames (can specify multiple times)')
|
|
@click.option('--tier1-count', default=10, type=int, help='Number of tier1 articles (default: 10)')
|
|
@click.option('--tier2-count', default=30, type=int, help='Number of tier2 articles (default: 30)')
|
|
@click.option('--tier1-branded-ratio', '-t', default=None, type=float, help='Ratio of branded anchor text for tier1 (optional, only prompts if provided)')
|
|
@click.option('--output', '-o', type=click.Path(), help='Output file path (default: jobs/{project_name}.json)')
|
|
@click.option('--username', '-u', help='Username for authentication')
|
|
@click.option('--password', '-pwd', help='Password for authentication')
|
|
def create_job(
|
|
project_id: int,
|
|
deployment_targets: tuple,
|
|
tier1_count: int,
|
|
tier2_count: int,
|
|
tier1_branded_ratio: Optional[float],
|
|
output: Optional[str],
|
|
username: Optional[str],
|
|
password: Optional[str]
|
|
):
|
|
"""Create a job file from an existing project ID"""
|
|
try:
|
|
if not username or not password:
|
|
username, password = prompt_admin_credentials()
|
|
|
|
session = db_manager.get_session()
|
|
try:
|
|
user_repo = UserRepository(session)
|
|
auth_service = AuthService(user_repo)
|
|
|
|
user = auth_service.authenticate_user(username, password)
|
|
if not user:
|
|
click.echo("Error: Authentication failed", err=True)
|
|
raise click.Abort()
|
|
|
|
project_repo = ProjectRepository(session)
|
|
project = project_repo.get_by_id(project_id)
|
|
|
|
if not project:
|
|
click.echo(f"Error: Project {project_id} not found", err=True)
|
|
raise click.Abort()
|
|
|
|
deployment_targets_list = list(deployment_targets) if deployment_targets else None
|
|
|
|
if not deployment_targets_list:
|
|
site_repo = SiteDeploymentRepository(session)
|
|
sites = site_repo.get_all()
|
|
available_domains = [
|
|
site.custom_hostname
|
|
for site in sites
|
|
if site.custom_hostname is not None
|
|
]
|
|
if available_domains:
|
|
click.echo(f"Available sites: {', '.join(available_domains[:5])}{'...' if len(available_domains) > 5 else ''}")
|
|
click.echo("Note: No deployment_targets specified. You can add them manually to the job file.")
|
|
|
|
sanitized_name = "".join(c if c.isalnum() or c in ('-', '_') else '-' for c in project.name.lower()).strip('-')
|
|
sanitized_name = '-'.join(sanitized_name.split())
|
|
|
|
jobs_dir = Path("jobs")
|
|
jobs_dir.mkdir(exist_ok=True)
|
|
|
|
if output:
|
|
filepath = Path(output)
|
|
else:
|
|
base_filename = f"{sanitized_name}.json"
|
|
filepath = jobs_dir / base_filename
|
|
|
|
if filepath.exists():
|
|
date_suffix = datetime.now().strftime("%y%m%d")
|
|
base_filename = f"{sanitized_name}-{date_suffix}.json"
|
|
filepath = jobs_dir / base_filename
|
|
|
|
job_template = {
|
|
"jobs": [
|
|
{
|
|
"project_id": project_id,
|
|
"tiers": {
|
|
"tier1": {
|
|
"count": tier1_count,
|
|
"min_word_count": 1250,
|
|
"max_word_count": 2000,
|
|
"models": {
|
|
"title": "openai/gpt-4o-mini",
|
|
"outline": "openai/gpt-4o-mini",
|
|
"content": "anthropic/claude-3.5-sonnet"
|
|
}
|
|
},
|
|
"tier2": {
|
|
"count": tier2_count,
|
|
"min_word_count": 1000,
|
|
"max_word_count": 1250,
|
|
"models": {
|
|
"title": "openai/gpt-4o-mini",
|
|
"outline": "openai/gpt-4o-mini",
|
|
"content": "openai/gpt-4o-mini"
|
|
},
|
|
"interlinking": {
|
|
"links_per_article_min": 3,
|
|
"links_per_article_max": 6
|
|
}
|
|
}
|
|
}
|
|
}
|
|
]
|
|
}
|
|
|
|
if deployment_targets_list:
|
|
job_template["jobs"][0]["deployment_targets"] = deployment_targets_list
|
|
|
|
with open(filepath, 'w', encoding='utf-8') as f:
|
|
json.dump(job_template, f, indent=2)
|
|
|
|
click.echo(f"\nJob file created: {filepath}")
|
|
click.echo(f"Project: {project.name} (ID: {project_id})")
|
|
click.echo(f"Tier1: {tier1_count} articles")
|
|
click.echo(f"Tier2: {tier2_count} articles")
|
|
if deployment_targets_list:
|
|
click.echo(f"Deployment targets: {', '.join(deployment_targets_list)}")
|
|
click.echo(f"\nTo run this job:")
|
|
click.echo(f" uv run python main.py generate-batch --job-file {filepath} -u {username} --password <password>")
|
|
|
|
finally:
|
|
session.close()
|
|
|
|
except Exception as e:
|
|
click.echo(f"Error creating job file: {e}", err=True)
|
|
raise click.Abort()
|
|
|
|
|
|
@app.command("generate-batch")
|
|
@click.option('--job-file', '-j', required=True, type=click.Path(exists=True),
|
|
help='Path to job JSON file')
|
|
@click.option('--username', '-u', help='Username for authentication')
|
|
@click.option('--password', '-p', help='Password for authentication')
|
|
@click.option('--debug', is_flag=True, help='Save AI responses to debug_output/')
|
|
@click.option('--continue-on-error', is_flag=True,
|
|
help='Continue processing if article generation fails')
|
|
@click.option('--model', '-m', default='gpt-4o-mini',
|
|
help='AI model to use (gpt-4o-mini, x-ai/grok-4-fast)')
|
|
def generate_batch(
|
|
job_file: str,
|
|
username: Optional[str],
|
|
password: Optional[str],
|
|
debug: bool,
|
|
continue_on_error: bool,
|
|
model: str
|
|
):
|
|
"""Generate content batch from job file"""
|
|
try:
|
|
if not username:
|
|
username = os.getenv("CLIENT_USERNAME")
|
|
if not password:
|
|
password = os.getenv("CLIENT_PASSWORD")
|
|
if not username or not password:
|
|
username, password = prompt_admin_credentials()
|
|
|
|
session = db_manager.get_session()
|
|
try:
|
|
user_repo = UserRepository(session)
|
|
auth_service = AuthService(user_repo)
|
|
|
|
user = auth_service.authenticate_user(username, password)
|
|
if not user:
|
|
click.echo("Error: Authentication failed", err=True)
|
|
raise click.Abort()
|
|
|
|
click.echo(f"Authenticated as: {user.username} ({user.role})")
|
|
|
|
api_key = os.getenv("OPENROUTER_API_KEY")
|
|
if not api_key:
|
|
click.echo("Error: OPENROUTER_API_KEY not found in environment", err=True)
|
|
click.echo("Please set OPENROUTER_API_KEY in your .env file", err=True)
|
|
raise click.Abort()
|
|
|
|
from src.generation.job_config import JobConfig
|
|
job_config = JobConfig(job_file)
|
|
jobs = job_config.get_jobs()
|
|
|
|
has_models_in_job = any(job.models is not None for job in jobs)
|
|
if has_models_in_job and model != 'gpt-4o-mini':
|
|
click.echo(f"Warning: Job file contains per-stage model configuration.")
|
|
click.echo(f" The --model flag will be ignored in favor of job config.\n")
|
|
|
|
click.echo(f"Initializing AI client with default model: {model}")
|
|
ai_client = AIClient(api_key=api_key, model=model)
|
|
prompt_manager = PromptManager()
|
|
|
|
project_repo = ProjectRepository(session)
|
|
content_repo = GeneratedContentRepository(session)
|
|
site_deployment_repo = SiteDeploymentRepository(session)
|
|
|
|
content_generator = ContentGenerator(
|
|
ai_client=ai_client,
|
|
prompt_manager=prompt_manager,
|
|
project_repo=project_repo,
|
|
content_repo=content_repo
|
|
)
|
|
|
|
max_workers = get_concurrent_workers()
|
|
job_max_workers = jobs[0].max_workers if jobs and jobs[0].max_workers else None
|
|
final_max_workers = job_max_workers or max_workers
|
|
|
|
batch_processor = BatchProcessor(
|
|
content_generator=content_generator,
|
|
content_repo=content_repo,
|
|
project_repo=project_repo,
|
|
site_deployment_repo=site_deployment_repo,
|
|
max_workers=final_max_workers
|
|
)
|
|
|
|
click.echo(f"\nProcessing job file: {job_file}")
|
|
click.echo(f"Concurrent workers: {final_max_workers}")
|
|
if debug:
|
|
click.echo("Debug mode: AI responses will be saved to debug_output/\n")
|
|
|
|
batch_processor.process_job(
|
|
job_file_path=job_file,
|
|
debug=debug,
|
|
continue_on_error=continue_on_error
|
|
)
|
|
|
|
done_dir = os.path.join("jobs", "done")
|
|
os.makedirs(done_dir, exist_ok=True)
|
|
|
|
job_path = job_file
|
|
job_filename = os.path.basename(job_path)
|
|
destination = os.path.join(done_dir, job_filename)
|
|
|
|
if os.path.exists(job_path):
|
|
os.rename(job_path, destination)
|
|
click.echo(f"\nJob file moved to: {destination}")
|
|
|
|
finally:
|
|
session.close()
|
|
|
|
except Exception as e:
|
|
click.echo(f"Error processing batch: {e}", err=True)
|
|
raise click.Abort()
|
|
|
|
|
|
@app.command("deploy-batch")
|
|
@click.option('--batch-id', '-b', required=True, type=int, help='Project/batch ID to deploy')
|
|
@click.option('--username', '-u', help='Username for authentication')
|
|
@click.option('--password', '-p', help='Password for authentication')
|
|
@click.option('--continue-on-error', is_flag=True, default=True,
|
|
help='Continue if file fails (default: True)')
|
|
@click.option('--dry-run', is_flag=True, help='Preview what would be deployed')
|
|
def deploy_batch(
|
|
batch_id: int,
|
|
username: Optional[str],
|
|
password: Optional[str],
|
|
continue_on_error: bool,
|
|
dry_run: bool
|
|
):
|
|
"""Deploy all content in a batch to cloud storage"""
|
|
try:
|
|
if not username or not password:
|
|
username, password = prompt_admin_credentials()
|
|
|
|
admin = authenticate_admin(username, password)
|
|
if not admin:
|
|
click.echo("Error: Authentication failed or insufficient permissions", err=True)
|
|
raise click.Abort()
|
|
|
|
click.echo(f"Authenticated as: {admin.username} ({admin.role})")
|
|
|
|
session = db_manager.get_session()
|
|
|
|
try:
|
|
project_repo = ProjectRepository(session)
|
|
content_repo = GeneratedContentRepository(session)
|
|
site_repo = SiteDeploymentRepository(session)
|
|
page_repo = SitePageRepository(session)
|
|
|
|
project = project_repo.get_by_id(batch_id)
|
|
if not project:
|
|
click.echo(f"Error: Project/batch {batch_id} not found", err=True)
|
|
raise click.Abort()
|
|
|
|
click.echo(f"\nDeploying batch: {project.name} (ID: {batch_id})")
|
|
click.echo(f"Keyword: {project.main_keyword}")
|
|
|
|
articles = content_repo.get_by_project_id(batch_id)
|
|
click.echo(f"Found {len(articles)} articles")
|
|
|
|
if dry_run:
|
|
click.echo("\nDRY RUN MODE - No files will be uploaded\n")
|
|
|
|
for article in articles:
|
|
if not article.site_deployment_id:
|
|
click.echo(f"SKIP: Article {article.id} - No site assigned")
|
|
continue
|
|
|
|
site = site_repo.get_by_id(article.site_deployment_id)
|
|
if not site:
|
|
click.echo(f"SKIP: Article {article.id} - Site not found")
|
|
continue
|
|
|
|
from src.generation.url_generator import generate_file_path, generate_public_url
|
|
file_path = generate_file_path(article)
|
|
url = generate_public_url(site, file_path)
|
|
|
|
click.echo(f"WOULD DEPLOY: {article.title[:50]}")
|
|
click.echo(f" File: {file_path}")
|
|
click.echo(f" URL: {url}")
|
|
|
|
site_ids = set(a.site_deployment_id for a in articles if a.site_deployment_id)
|
|
for site_id in site_ids:
|
|
pages = page_repo.get_by_site(site_id)
|
|
for page in pages:
|
|
click.echo(f"WOULD DEPLOY: {page.page_type}.html")
|
|
|
|
click.echo("\nDry run complete. Use without --dry-run to actually deploy.")
|
|
return
|
|
|
|
url_logger = URLLogger()
|
|
|
|
deployment_service = DeploymentService(
|
|
content_repo=content_repo,
|
|
site_repo=site_repo,
|
|
page_repo=page_repo,
|
|
url_logger=url_logger
|
|
)
|
|
|
|
click.echo(f"\nStarting deployment...")
|
|
click.echo(f"Continue on error: {continue_on_error}")
|
|
click.echo("")
|
|
|
|
results = deployment_service.deploy_batch(
|
|
project_id=batch_id,
|
|
continue_on_error=continue_on_error
|
|
)
|
|
|
|
click.echo("\n" + "=" * 70)
|
|
click.echo("Deployment Summary")
|
|
click.echo("=" * 70)
|
|
click.echo(f"Articles deployed: {results['articles_deployed']}")
|
|
click.echo(f"Articles failed: {results['articles_failed']}")
|
|
click.echo(f"Pages deployed: {results['pages_deployed']}")
|
|
click.echo(f"Pages failed: {results['pages_failed']}")
|
|
click.echo(f"Total time: {results['total_time']:.1f}s")
|
|
|
|
if results['errors']:
|
|
click.echo("\nErrors:")
|
|
for error in results['errors']:
|
|
if error['type'] == 'article':
|
|
click.echo(f" Article {error['id']} ({error.get('title', 'N/A')[:40]}): {error['error']}")
|
|
else:
|
|
click.echo(f" Page {error.get('page_type', 'N/A')} (Site {error.get('site_id')}): {error['error']}")
|
|
|
|
click.echo("=" * 70)
|
|
|
|
if results['articles_failed'] > 0 or results['pages_failed'] > 0:
|
|
raise click.Abort()
|
|
|
|
except BunnyStorageError as e:
|
|
click.echo(f"\nError: Storage upload failed - {e}", err=True)
|
|
raise click.Abort()
|
|
finally:
|
|
session.close()
|
|
|
|
except Exception as e:
|
|
click.echo(f"Error deploying batch: {e}", err=True)
|
|
raise click.Abort()
|
|
|
|
|
|
@app.command("verify-deployment")
|
|
@click.option('--batch-id', '-b', required=True, type=int, help='Project/batch ID to verify')
|
|
@click.option('--sample', '-s', type=int, help='Number of random URLs to check (default: check all)')
|
|
@click.option('--timeout', '-t', type=int, default=10, help='Request timeout in seconds (default: 10)')
|
|
def verify_deployment(batch_id: int, sample: Optional[int], timeout: int):
|
|
"""Verify deployed URLs return 200 OK status"""
|
|
try:
|
|
session = db_manager.get_session()
|
|
|
|
try:
|
|
content_repo = GeneratedContentRepository(session)
|
|
project_repo = ProjectRepository(session)
|
|
|
|
project = project_repo.get_by_id(batch_id)
|
|
if not project:
|
|
click.echo(f"Error: Project/batch {batch_id} not found", err=True)
|
|
raise click.Abort()
|
|
|
|
click.echo(f"Verifying deployment for batch: {project.name} (ID: {batch_id})")
|
|
click.echo(f"Keyword: {project.main_keyword}\n")
|
|
|
|
articles = content_repo.get_by_project_id(batch_id)
|
|
deployed_articles = [a for a in articles if a.deployed_url and a.status == 'deployed']
|
|
|
|
if not deployed_articles:
|
|
click.echo("No deployed articles found for this batch.")
|
|
return
|
|
|
|
click.echo(f"Found {len(deployed_articles)} deployed articles")
|
|
|
|
urls_to_check = deployed_articles
|
|
if sample and sample < len(deployed_articles):
|
|
urls_to_check = random.sample(deployed_articles, sample)
|
|
click.echo(f"Checking random sample of {sample} URLs\n")
|
|
else:
|
|
click.echo(f"Checking all {len(deployed_articles)} URLs\n")
|
|
|
|
successful = []
|
|
failed = []
|
|
|
|
for article in urls_to_check:
|
|
url = article.deployed_url
|
|
try:
|
|
response = requests.get(url, timeout=timeout, allow_redirects=True)
|
|
if response.status_code == 200:
|
|
successful.append((url, article.title))
|
|
click.echo(f"✓ {url}")
|
|
else:
|
|
failed.append((url, article.title, response.status_code))
|
|
click.echo(f"✗ {url} (HTTP {response.status_code})")
|
|
except requests.exceptions.RequestException as e:
|
|
failed.append((url, article.title, str(e)))
|
|
click.echo(f"✗ {url} (Error: {type(e).__name__})")
|
|
|
|
click.echo("\n" + "=" * 70)
|
|
click.echo("Verification Summary")
|
|
click.echo("=" * 70)
|
|
click.echo(f"Total checked: {len(urls_to_check)}")
|
|
click.echo(f"Successful: {len(successful)}")
|
|
click.echo(f"Failed: {len(failed)}")
|
|
|
|
if failed:
|
|
click.echo("\nFailed URLs:")
|
|
for url, title, error in failed:
|
|
title_preview = title[:50] + "..." if len(title) > 50 else title
|
|
click.echo(f" {url}")
|
|
click.echo(f" Title: {title_preview}")
|
|
click.echo(f" Error: {error}")
|
|
|
|
click.echo("=" * 70)
|
|
|
|
if failed:
|
|
raise click.Abort()
|
|
|
|
finally:
|
|
session.close()
|
|
|
|
except Exception as e:
|
|
click.echo(f"Error verifying deployment: {e}", err=True)
|
|
raise click.Abort()
|
|
|
|
|
|
@app.command("get-links")
|
|
@click.option('--project-id', '-p', required=True, type=int, help='Project ID to get links for')
|
|
@click.option('--tier', '-t', required=True, help='Tier to filter (e.g., "1" or "2+" for tier 2 and above)')
|
|
@click.option('--with-anchor-text', is_flag=True, help='Include anchor text used for tiered links')
|
|
@click.option('--with-destination-url', is_flag=True, help='Include destination URL that the article links to')
|
|
def get_links(project_id: int, tier: str, with_anchor_text: bool, with_destination_url: bool):
|
|
"""Export article URLs with optional link details for a project and tier"""
|
|
import csv
|
|
import sys
|
|
from src.database.repositories import ArticleLinkRepository
|
|
|
|
try:
|
|
session = db_manager.get_session()
|
|
|
|
try:
|
|
content_repo = GeneratedContentRepository(session)
|
|
project_repo = ProjectRepository(session)
|
|
link_repo = ArticleLinkRepository(session)
|
|
|
|
project = project_repo.get_by_id(project_id)
|
|
if not project:
|
|
click.echo(f"Error: Project {project_id} not found", err=True)
|
|
raise click.Abort()
|
|
|
|
tier_range_mode = False
|
|
min_tier = 1
|
|
|
|
if tier.endswith('+'):
|
|
tier_range_mode = True
|
|
try:
|
|
min_tier = int(tier[:-1])
|
|
except ValueError:
|
|
click.echo(f"Error: Invalid tier format '{tier}'. Use '1', '2', or '2+'", err=True)
|
|
raise click.Abort()
|
|
else:
|
|
try:
|
|
min_tier = int(tier)
|
|
tier_range_mode = False
|
|
except ValueError:
|
|
click.echo(f"Error: Invalid tier format '{tier}'. Use '1', '2', or '2+'", err=True)
|
|
raise click.Abort()
|
|
|
|
all_articles = content_repo.get_by_project_id(project_id)
|
|
|
|
if tier_range_mode:
|
|
articles = [a for a in all_articles if a.deployed_url and int(a.tier.replace('tier', '')) >= min_tier]
|
|
else:
|
|
tier_str = f"tier{min_tier}"
|
|
articles = [a for a in all_articles if a.deployed_url and a.tier == tier_str]
|
|
|
|
if not articles:
|
|
click.echo(f"No deployed articles found for project {project_id} with tier filter '{tier}'", err=True)
|
|
raise click.Abort()
|
|
|
|
csv_writer = csv.writer(sys.stdout)
|
|
|
|
header = ['article_url', 'tier', 'title']
|
|
if with_anchor_text:
|
|
header.append('anchor_text')
|
|
if with_destination_url:
|
|
header.append('destination_url')
|
|
|
|
csv_writer.writerow(header)
|
|
|
|
for article in articles:
|
|
row = [article.deployed_url, article.tier, article.title]
|
|
|
|
if with_anchor_text or with_destination_url:
|
|
tiered_links = link_repo.get_by_source_article(article.id)
|
|
tiered_links = [link for link in tiered_links if link.link_type == 'tiered']
|
|
|
|
if tiered_links:
|
|
for link in tiered_links:
|
|
row_with_link = row.copy()
|
|
|
|
if with_anchor_text:
|
|
row_with_link.append(link.anchor_text or '')
|
|
|
|
if with_destination_url:
|
|
if link.to_url:
|
|
row_with_link.append(link.to_url)
|
|
elif link.to_content_id:
|
|
target_article = content_repo.get_by_id(link.to_content_id)
|
|
row_with_link.append(target_article.deployed_url if target_article and target_article.deployed_url else '')
|
|
else:
|
|
row_with_link.append('')
|
|
|
|
csv_writer.writerow(row_with_link)
|
|
else:
|
|
if with_anchor_text:
|
|
row.append('')
|
|
if with_destination_url:
|
|
row.append('')
|
|
csv_writer.writerow(row)
|
|
else:
|
|
csv_writer.writerow(row)
|
|
|
|
finally:
|
|
session.close()
|
|
|
|
except Exception as e:
|
|
click.echo(f"Error getting links: {e}", err=True)
|
|
raise click.Abort()
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
app()
|