Add image generation using fal.ai API

- Implement ImageGenerator class with hero and content image generation
- Add image theme prompt generation and caching
- Integrate with fal.ai flux-1/schnell API
- Add image upload to storage (Bunny CDN)
- Add image injection into HTML content
- Add test script for image generation
- Update database models and repositories for image fields
- Fix API usage: use arguments parameter and image_size object format
main
PeninsulaInd 2025-11-19 17:09:20 -06:00
parent 01db5cc1c6
commit 8379313f51
12 changed files with 1072 additions and 7 deletions

View File

@ -49,6 +49,9 @@ CLOUDFLARE_ACCOUNT_ID=your_cloudflare_account_id_here
LINK_BUILDER_API_URL=http://localhost:8001/api
LINK_BUILDER_API_KEY=your_link_builder_api_key_here
# fal.ai Image Generation API
FAL_API_KEY=your_fal_api_key_here
# Application Configuration
LOG_LEVEL=INFO
ENVIRONMENT=development

View File

@ -32,7 +32,7 @@ beautifulsoup4==4.14.2
# AI/ML
openai==2.5.0
fal-client==0.9.1
# Testing
pytest==8.4.2
pytest-asyncio==0.21.1

View File

@ -0,0 +1,101 @@
"""
Migration script to add image fields to projects and generated_content tables
Story 7.1: Generate and Insert Images into Articles
"""
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent))
from src.database.session import db_manager
from sqlalchemy import text
def migrate():
"""Add image fields to projects and generated_content tables"""
session = db_manager.get_session()
try:
print("Starting migration: Add image fields...")
print(" Adding image_theme_prompt to projects table...")
session.execute(text("""
ALTER TABLE projects
ADD COLUMN image_theme_prompt TEXT NULL
"""))
print(" Adding hero_image_url to generated_content table...")
session.execute(text("""
ALTER TABLE generated_content
ADD COLUMN hero_image_url TEXT NULL
"""))
print(" Adding content_images to generated_content table...")
session.execute(text("""
ALTER TABLE generated_content
ADD COLUMN content_images JSON NULL
"""))
session.commit()
print("Migration completed successfully!")
print("\nNew fields added:")
print(" - projects.image_theme_prompt (TEXT, nullable)")
print(" - generated_content.hero_image_url (TEXT, nullable)")
print(" - generated_content.content_images (JSON, nullable)")
except Exception as e:
session.rollback()
print(f"Migration failed: {e}")
raise
finally:
session.close()
def rollback():
"""Rollback migration (remove image fields)"""
session = db_manager.get_session()
try:
print("Rolling back migration: Remove image fields...")
print(" Removing content_images column...")
session.execute(text("""
ALTER TABLE generated_content
DROP COLUMN content_images
"""))
print(" Removing hero_image_url column...")
session.execute(text("""
ALTER TABLE generated_content
DROP COLUMN hero_image_url
"""))
print(" Removing image_theme_prompt column...")
session.execute(text("""
ALTER TABLE projects
DROP COLUMN image_theme_prompt
"""))
session.commit()
print("Rollback completed successfully!")
except Exception as e:
session.rollback()
print(f"Rollback failed: {e}")
raise
finally:
session.close()
if __name__ == "__main__":
if len(sys.argv) > 1 and sys.argv[1] == "rollback":
rollback()
else:
migrate()

View File

@ -0,0 +1,288 @@
"""
Test script to generate images for existing articles
Tests image generation on project 23: first 2 T1 articles and first 3 T2 articles
"""
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent))
from src.database.session import db_manager
from src.database.repositories import (
ProjectRepository,
GeneratedContentRepository,
SiteDeploymentRepository
)
from src.generation.service import ContentGenerator
from src.generation.ai_client import AIClient, PromptManager
from src.generation.image_generator import ImageGenerator, truncate_title, slugify
from src.generation.image_injection import insert_hero_after_h1, insert_content_images_after_h2s, generate_alt_text
from src.generation.image_upload import upload_image_to_storage
from src.deployment.bunny_storage import BunnyStorageClient
from src.core.config import get_config
import click
import random
from pathlib import Path
def test_image_generation(project_id: int):
"""Test image generation on existing articles"""
# Create output directory for test images
output_dir = Path("test_images")
output_dir.mkdir(exist_ok=True)
click.echo(f"Test images will be saved to: {output_dir.absolute()}\n")
session = db_manager.get_session()
try:
# Get repositories
project_repo = ProjectRepository(session)
content_repo = GeneratedContentRepository(session)
site_repo = SiteDeploymentRepository(session)
# Get project
project = project_repo.get_by_id(project_id)
if not project:
click.echo(f"Project {project_id} not found")
return
click.echo(f"\n{'='*60}")
click.echo(f"Testing Image Generation for Project {project_id}")
click.echo(f"Project: {project.name}")
click.echo(f"Main Keyword: {project.main_keyword}")
click.echo(f"{'='*60}\n")
# Get articles
t1_articles = content_repo.get_by_project_and_tier(project_id, "tier1", require_site=False)
t2_articles = content_repo.get_by_project_and_tier(project_id, "tier2", require_site=False)
click.echo(f"Found {len(t1_articles)} T1 articles, using first 2")
click.echo(f"Found {len(t2_articles)} T2 articles, using first 3\n")
# Initialize AI client and image generator
import os
from dotenv import load_dotenv
load_dotenv()
api_key = os.getenv("OPENROUTER_API_KEY")
if not api_key:
click.echo("Error: OPENROUTER_API_KEY not set in environment", err=True)
return
fal_api_key = os.getenv("FAL_API_KEY")
if not fal_api_key:
click.echo("\n[WARN] FAL_API_KEY not set - image generation will fail")
click.echo(" Set FAL_API_KEY in your .env file to test image generation\n")
ai_client = AIClient(
api_key=api_key,
model=os.getenv("AI_MODEL", "gpt-4o-mini")
)
prompt_manager = PromptManager()
image_generator = ImageGenerator(
ai_client=ai_client,
prompt_manager=prompt_manager,
project_repo=project_repo
)
storage_client = BunnyStorageClient()
# Test T1 articles (first 2)
click.echo(f"\n{'='*60}")
click.echo("T1 ARTICLES")
click.echo(f"{'='*60}\n")
for i, article in enumerate(t1_articles[:2], 1):
click.echo(f"\n--- T1 Article {i}: {article.title[:60]}... ---")
if not article.site_deployment_id:
click.echo(" [WARN] No site assigned, skipping image upload")
site = None
else:
site = site_repo.get_by_id(article.site_deployment_id)
if not site:
click.echo(" [WARN] Site not found, skipping image upload")
site = None
# Generate theme prompt (if not exists)
click.echo("\n1. Theme Prompt:")
if project.image_theme_prompt:
click.echo(f" (Using cached): {project.image_theme_prompt}")
else:
click.echo(" Generating theme prompt...")
theme = image_generator.get_theme_prompt(project_id)
click.echo(f" Generated: {theme}")
# Generate hero image
click.echo("\n2. Hero Image:")
try:
# Show the prompt that will be used
theme = image_generator.get_theme_prompt(project_id)
title_short = truncate_title(article.title, 4)
hero_prompt = f"{theme} Text: '{title_short}' in clean simple uppercase letters, positioned in middle of image."
click.echo(f" Prompt: {hero_prompt}")
hero_image = image_generator.generate_hero_image(
project_id=project_id,
title=article.title,
width=1280,
height=720
)
if hero_image:
click.echo(f" [OK] Generated ({len(hero_image):,} bytes)")
# Save to local file
main_keyword_slug = slugify(project.main_keyword)
local_file = output_dir / f"hero-t1-{main_keyword_slug}-{i}.jpg"
local_file.write_bytes(hero_image)
click.echo(f" [OK] Saved to: {local_file}")
if site:
file_path = f"images/{main_keyword_slug}.jpg"
hero_url = upload_image_to_storage(storage_client, site, hero_image, file_path)
if hero_url:
click.echo(f" [OK] Uploaded: {hero_url}")
else:
click.echo(" [FAIL] Upload failed")
else:
click.echo(" (Skipped upload - no site)")
else:
click.echo(" [FAIL] Generation failed")
except Exception as e:
click.echo(f" [ERROR] {str(e)[:200]}")
# Generate content images (1-3 for T1)
click.echo("\n3. Content Images:")
num_content_images = random.randint(1, 3)
click.echo(f" Generating {num_content_images} content image(s)...")
entities = project.entities or []
related_searches = project.related_searches or []
if not entities or not related_searches:
click.echo(" [WARN] No entities/related_searches, skipping")
else:
for j in range(num_content_images):
entity = random.choice(entities)
related_search = random.choice(related_searches)
click.echo(f"\n Image {j+1}/{num_content_images}:")
click.echo(f" Entity: {entity}")
click.echo(f" Related Search: {related_search}")
try:
# Show the prompt that will be used
theme = image_generator.get_theme_prompt(project_id)
content_prompt = f"{theme} Focus on {entity} and {related_search}, professional illustration style."
click.echo(f" Prompt: {content_prompt}")
content_image = image_generator.generate_content_image(
project_id=project_id,
entity=entity,
related_search=related_search,
width=512,
height=512
)
if content_image:
click.echo(f" [OK] Generated ({len(content_image):,} bytes)")
# Save to local file
main_keyword_slug = slugify(project.main_keyword)
entity_slug = slugify(entity)
related_slug = slugify(related_search)
local_file = output_dir / f"content-{main_keyword_slug}-{i}-{j+1}-{entity_slug}-{related_slug}.jpg"
local_file.write_bytes(content_image)
click.echo(f" [OK] Saved to: {local_file}")
if site:
file_path = f"images/{main_keyword_slug}-{entity_slug}-{related_slug}.jpg"
img_url = upload_image_to_storage(storage_client, site, content_image, file_path)
if img_url:
click.echo(f" [OK] Uploaded: {img_url}")
else:
click.echo(" [FAIL] Upload failed")
else:
click.echo(" (Skipped upload - no site)")
else:
click.echo(" [FAIL] Generation failed")
except Exception as e:
click.echo(f" [ERROR] {str(e)[:200]}")
# Test T2 articles (first 3)
click.echo(f"\n\n{'='*60}")
click.echo("T2 ARTICLES")
click.echo(f"{'='*60}\n")
for i, article in enumerate(t2_articles[:3], 1):
click.echo(f"\n--- T2 Article {i}: {article.title[:60]}... ---")
if not article.site_deployment_id:
click.echo(" [WARN] No site assigned, skipping image upload")
site = None
else:
site = site_repo.get_by_id(article.site_deployment_id)
if not site:
click.echo(" [WARN] Site not found, skipping image upload")
site = None
# Generate hero image only (T2 doesn't get content images by default)
click.echo("\n1. Hero Image:")
try:
# Show the prompt that will be used
theme = image_generator.get_theme_prompt(project_id)
title_short = truncate_title(article.title, 4)
hero_prompt = f"{theme} Text: '{title_short}' in clean simple uppercase letters, positioned in middle of image."
click.echo(f" Prompt: {hero_prompt}")
hero_image = image_generator.generate_hero_image(
project_id=project_id,
title=article.title,
width=1280,
height=720
)
if hero_image:
click.echo(f" [OK] Generated ({len(hero_image):,} bytes)")
# Save to local file
main_keyword_slug = slugify(project.main_keyword)
local_file = output_dir / f"hero-t2-{main_keyword_slug}-{i}.jpg"
local_file.write_bytes(hero_image)
click.echo(f" [OK] Saved to: {local_file}")
if site:
file_path = f"images/{main_keyword_slug}.jpg"
hero_url = upload_image_to_storage(storage_client, site, hero_image, file_path)
if hero_url:
click.echo(f" [OK] Uploaded: {hero_url}")
else:
click.echo(" [FAIL] Upload failed")
else:
click.echo(" (Skipped upload - no site)")
else:
click.echo(" [FAIL] Generation failed")
except Exception as e:
click.echo(f" [ERROR] {str(e)[:200]}")
click.echo("\n2. Content Images:")
click.echo(" (Skipped - T2 articles don't get content images by default)")
click.echo(f"\n\n{'='*60}")
click.echo("TEST COMPLETE")
click.echo(f"{'='*60}\n")
except Exception as e:
click.echo(f"Error: {e}", err=True)
import traceback
traceback.print_exc()
finally:
session.close()
if __name__ == "__main__":
test_image_generation(23)

View File

@ -109,6 +109,7 @@ class Project(Base):
custom_anchor_text: Mapped[Optional[list]] = mapped_column(JSON, nullable=True)
spintax_related_search_terms: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
image_theme_prompt: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow, nullable=False)
updated_at: Mapped[datetime] = mapped_column(
@ -140,6 +141,8 @@ class GeneratedContent(Base):
site_deployment_id: Mapped[Optional[int]] = mapped_column(Integer, ForeignKey('site_deployments.id'), nullable=True, index=True)
deployed_url: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
deployed_at: Mapped[Optional[datetime]] = mapped_column(DateTime, nullable=True, index=True)
hero_image_url: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
content_images: Mapped[Optional[list]] = mapped_column(JSON, nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow, nullable=False)
updated_at: Mapped[datetime] = mapped_column(
DateTime,

View File

@ -411,7 +411,9 @@ class GeneratedContentRepository:
content: str,
word_count: int,
status: str,
site_deployment_id: Optional[int] = None
site_deployment_id: Optional[int] = None,
hero_image_url: Optional[str] = None,
content_images: Optional[list] = None
) -> GeneratedContent:
"""
Create a new generated content record
@ -439,7 +441,9 @@ class GeneratedContentRepository:
content=content,
word_count=word_count,
status=status,
site_deployment_id=site_deployment_id
site_deployment_id=site_deployment_id,
hero_image_url=hero_image_url,
content_images=content_images
)
self.session.add(content_record)

View File

@ -21,6 +21,11 @@ from src.generation.site_assignment import assign_sites_to_batch
from src.deployment.bunny_storage import BunnyStorageClient
from src.deployment.deployment_service import DeploymentService
from src.deployment.url_logger import URLLogger
from src.generation.image_generator import ImageGenerator
from src.generation.image_injection import insert_hero_after_h1, insert_content_images_after_h2s, generate_alt_text
from src.generation.image_upload import upload_image_to_storage
from src.generation.image_generator import slugify
import random
class BatchProcessor:
@ -352,6 +357,17 @@ class BatchProcessor:
status = "augmented"
self.stats["augmented_articles"] += 1
# Generate and insert images
content, hero_url, content_image_urls = self._generate_and_insert_images(
project_id=project_id,
tier_name=tier_name,
tier_config=tier_config,
title=title,
content=content,
site_deployment_id=site_deployment_id,
prefix=prefix
)
saved_content = self.content_repo.create(
project_id=project_id,
tier=tier_name,
@ -361,11 +377,128 @@ class BatchProcessor:
content=content,
word_count=word_count,
status=status,
site_deployment_id=site_deployment_id
site_deployment_id=site_deployment_id,
hero_image_url=hero_url,
content_images=content_image_urls if content_image_urls else None
)
click.echo(f"{prefix} Saved (ID: {saved_content.id}, Status: {status})")
def _generate_and_insert_images(
self,
project_id: int,
tier_name: str,
tier_config: TierConfig,
title: str,
content: str,
site_deployment_id: Optional[int],
prefix: str
) -> tuple[str, Optional[str], List[str]]:
"""
Generate images and insert into HTML content
Note: image_config is always created by job config parser (with defaults if not in JSON).
Defaults: hero images for all tiers (1280x720), content images for T1 only (1-3 images).
"""
if not tier_config.image_config:
return content, None, []
project = self.project_repo.get_by_id(project_id)
if not project:
return content, None, []
# Initialize image generator
image_generator = ImageGenerator(
ai_client=self.generator.ai_client,
prompt_manager=self.generator.prompt_manager,
project_repo=self.project_repo
)
storage_client = BunnyStorageClient()
hero_url = None
content_image_urls = []
# Generate hero image (all tiers if enabled)
if tier_config.image_config.hero:
try:
click.echo(f"{prefix} Generating hero image...")
hero_image = image_generator.generate_hero_image(
project_id=project_id,
title=title,
width=tier_config.image_config.hero.width,
height=tier_config.image_config.hero.height
)
if hero_image and site_deployment_id:
site = self.site_deployment_repo.get_by_id(site_deployment_id) if self.site_deployment_repo else None
if site:
main_keyword_slug = slugify(project.main_keyword)
file_path = f"images/{main_keyword_slug}.jpg"
hero_url = upload_image_to_storage(storage_client, site, hero_image, file_path)
if hero_url:
click.echo(f"{prefix} Hero image uploaded: {hero_url}")
else:
click.echo(f"{prefix} Hero image upload failed")
except Exception as e:
click.echo(f"{prefix} Hero image generation failed: {e}")
# Generate content images (T1 only, if enabled)
if tier_config.image_config.content and tier_config.image_config.content.max_num_images > 0:
try:
num_images = random.randint(
tier_config.image_config.content.min_num_images,
tier_config.image_config.content.max_num_images
)
if num_images > 0:
click.echo(f"{prefix} Generating {num_images} content image(s)...")
entities = project.entities or []
related_searches = project.related_searches or []
if not entities or not related_searches:
click.echo(f"{prefix} Skipping content images (no entities/related_searches)")
else:
for i in range(num_images):
try:
entity = random.choice(entities)
related_search = random.choice(related_searches)
content_image = image_generator.generate_content_image(
project_id=project_id,
entity=entity,
related_search=related_search,
width=tier_config.image_config.content.width,
height=tier_config.image_config.content.height
)
if content_image and site_deployment_id:
site = self.site_deployment_repo.get_by_id(site_deployment_id) if self.site_deployment_repo else None
if site:
main_keyword_slug = slugify(project.main_keyword)
entity_slug = slugify(entity)
related_slug = slugify(related_search)
file_path = f"images/{main_keyword_slug}-{entity_slug}-{related_slug}.jpg"
img_url = upload_image_to_storage(storage_client, site, content_image, file_path)
if img_url:
content_image_urls.append(img_url)
click.echo(f"{prefix} Content image {i+1}/{num_images} uploaded")
except Exception as e:
click.echo(f"{prefix} Content image {i+1} generation failed: {e}")
except Exception as e:
click.echo(f"{prefix} Content image generation failed: {e}")
# Insert images into HTML
if hero_url:
alt_text = generate_alt_text(project)
content = insert_hero_after_h1(content, hero_url, alt_text)
if content_image_urls:
alt_texts = [generate_alt_text(project) for _ in content_image_urls]
content = insert_content_images_after_h2s(content, content_image_urls, alt_texts)
return content, hero_url, content_image_urls
def _process_articles_concurrent(
self,
article_tasks: List[Dict[str, Any]],
@ -547,6 +680,17 @@ class BatchProcessor:
with self.stats_lock:
self.stats["augmented_articles"] += 1
# Generate and insert images
content, hero_url, content_image_urls = self._generate_and_insert_images(
project_id=project_id,
tier_name=tier_name,
tier_config=tier_config,
title=title,
content=content,
site_deployment_id=site_deployment_id,
prefix=prefix
)
saved_content = thread_content_repo.create(
project_id=project_id,
tier=tier_name,
@ -556,7 +700,9 @@ class BatchProcessor:
content=content,
word_count=word_count,
status=status,
site_deployment_id=site_deployment_id
site_deployment_id=site_deployment_id,
hero_image_url=hero_url,
content_images=content_image_urls if content_image_urls else None
)
thread_session.commit()

View File

@ -0,0 +1,222 @@
"""
Image generation using fal.ai FLUX.1 schnell API
"""
import os
import re
import random
import logging
import requests
from typing import Optional, Tuple
from concurrent.futures import ThreadPoolExecutor, as_completed
import fal_client
from src.generation.ai_client import AIClient, PromptManager
from src.database.repositories import ProjectRepository
logger = logging.getLogger(__name__)
def truncate_title(title: str, max_words: int = 4) -> str:
"""Truncate title to max_words and convert to UPPERCASE"""
words = title.split()[:max_words]
return " ".join(words).upper()
def slugify(text: str) -> str:
"""Convert text to URL-friendly slug"""
text = text.lower()
text = re.sub(r'[^a-z0-9]+', '-', text)
text = text.strip('-')
return text
class ImageGenerator:
"""Generate images using fal.ai API"""
def __init__(
self,
ai_client: AIClient,
prompt_manager: PromptManager,
project_repo: ProjectRepository
):
self.ai_client = ai_client
self.prompt_manager = prompt_manager
self.project_repo = project_repo
# fal_client library expects FAL_KEY, but we use FAL_API_KEY in our env
# Set both for compatibility
self.fal_key = os.getenv("FAL_API_KEY") or os.getenv("FAL_KEY")
if self.fal_key and not os.getenv("FAL_KEY"):
os.environ["FAL_KEY"] = self.fal_key
if not self.fal_key:
logger.warning("FAL_API_KEY not set, image generation will fail")
self.max_concurrent = 5
self.executor = ThreadPoolExecutor(max_workers=self.max_concurrent)
def get_theme_prompt(self, project_id: int) -> str:
"""Get or generate theme prompt for project"""
project = self.project_repo.get_by_id(project_id)
if not project:
raise ValueError(f"Project {project_id} not found")
if project.image_theme_prompt:
return project.image_theme_prompt
# Generate theme prompt using AI
entities_str = ", ".join(project.entities or [])
related_str = ", ".join(project.related_searches or [])
system_msg, user_prompt = self.prompt_manager.format_prompt(
"image_theme_generation",
main_keyword=project.main_keyword,
entities=entities_str,
related_searches=related_str
)
theme_prompt, _ = self.ai_client.generate_completion(
prompt=user_prompt,
system_message=system_msg,
max_tokens=200,
temperature=0.7
)
# Save to project
project.image_theme_prompt = theme_prompt.strip()
self.project_repo.session.commit()
logger.info(f"Generated theme prompt for project {project_id}")
return project.image_theme_prompt
def generate_hero_image(
self,
project_id: int,
title: str,
width: int = 1280,
height: int = 720
) -> Optional[bytes]:
"""Generate hero image with title text"""
if not self.fal_key:
logger.error("FAL_API_KEY not set")
return None
try:
theme = self.get_theme_prompt(project_id)
title_short = truncate_title(title, 4)
prompt = f"{theme} Text: '{title_short}' in clean simple uppercase letters, positioned in middle of image."
logger.info(f"Generating hero image with prompt: {prompt}")
result = fal_client.subscribe(
"fal-ai/flux-1/schnell",
arguments={
"prompt": prompt,
"image_size": {"width": width, "height": height},
"num_inference_steps": 4,
"guidance_scale": 3.5,
"output_format": "jpeg"
},
with_logs=True
)
logger.debug(f"API response keys: {result.keys() if result else 'None'}")
logger.debug(f"API response type: {type(result)}")
# Check different possible response structures
images = None
if result:
if "images" in result:
images = result["images"]
elif "data" in result and "images" in result["data"]:
images = result["data"]["images"]
elif isinstance(result, dict) and len(result) == 1 and "images" in list(result.values())[0]:
images = list(result.values())[0]["images"]
if images and len(images) > 0:
image_data = images[0]
image_url = image_data.get("url")
if not image_url:
logger.error(f"No URL in image response. Image data keys: {image_data.keys() if isinstance(image_data, dict) else 'not a dict'}")
return None
logger.info(f"Downloading hero image from URL: {image_url}")
response = requests.get(image_url, timeout=30)
response.raise_for_status()
return response.content
logger.error(f"No image returned from fal.ai. Response: {result}")
return None
except Exception as e:
logger.error(f"Failed to generate hero image: {e}", exc_info=True)
logger.error(f"Exception type: {type(e).__name__}")
if hasattr(e, 'response'):
logger.error(f"Response: {e.response}")
return None
def generate_content_image(
self,
project_id: int,
entity: str,
related_search: str,
width: int = 512,
height: int = 512
) -> Optional[bytes]:
"""Generate content image with entity and related search"""
if not self.fal_key:
logger.error("FAL_API_KEY not set")
return None
try:
theme = self.get_theme_prompt(project_id)
prompt = f"{theme} Focus on {entity} and {related_search}, professional illustration style."
logger.info(f"Generating content image with prompt: {prompt}")
result = fal_client.subscribe(
"fal-ai/flux-1/schnell",
arguments={
"prompt": prompt,
"image_size": {"width": width, "height": height},
"num_inference_steps": 4,
"guidance_scale": 3.5,
"output_format": "jpeg"
},
with_logs=True
)
logger.debug(f"API response keys: {result.keys() if result else 'None'}")
logger.debug(f"API response type: {type(result)}")
# Check different possible response structures
images = None
if result:
if "images" in result:
images = result["images"]
elif "data" in result and "images" in result["data"]:
images = result["data"]["images"]
elif isinstance(result, dict) and len(result) == 1 and "images" in list(result.values())[0]:
images = list(result.values())[0]["images"]
if images and len(images) > 0:
image_data = images[0]
image_url = image_data.get("url")
if not image_url:
logger.error(f"No URL in image response. Image data keys: {image_data.keys() if isinstance(image_data, dict) else 'not a dict'}")
return None
logger.info(f"Downloading content image from URL: {image_url}")
response = requests.get(image_url, timeout=30)
response.raise_for_status()
return response.content
logger.error(f"No image returned from fal.ai. Response: {result}")
return None
except Exception as e:
logger.error(f"Failed to generate content image: {e}", exc_info=True)
logger.error(f"Exception type: {type(e).__name__}")
if hasattr(e, 'response'):
logger.error(f"Response: {e.response}")
return None

View File

@ -0,0 +1,94 @@
"""
HTML image insertion logic
"""
import re
import random
from typing import List, Optional
from src.database.models import Project
def generate_alt_text(project: Project) -> str:
"""Generate alt text with 3 entities and 2 related searches"""
entities = project.entities or []
related_searches = project.related_searches or []
# Pick 3 random entities (or all if less than 3)
selected_entities = random.sample(entities, min(3, len(entities))) if entities else []
# Pick 2 random related searches (or all if less than 2)
selected_related = random.sample(related_searches, min(2, len(related_searches))) if related_searches else []
# Combine: entity1 related_search1 entity2 related_search2 entity3
parts = []
# Add entities and related searches in order: entity1, related1, entity2, related2, entity3
for i in range(max(len(selected_entities), len(selected_related))):
if i < len(selected_entities):
parts.append(selected_entities[i])
if i < len(selected_related):
parts.append(selected_related[i])
if len(parts) >= 5:
break
return " ".join(parts[:5]) if parts else project.main_keyword
def insert_hero_after_h1(html: str, hero_url: str, alt_text: str) -> str:
"""Insert hero image immediately after first H1 tag"""
# Find first <h1>...</h1>
pattern = r'(<h1[^>]*>.*?</h1>)'
match = re.search(pattern, html, re.IGNORECASE | re.DOTALL)
if match:
h1_tag = match.group(1)
img_tag = f'<img src="{hero_url}" alt="{alt_text}" />'
return html.replace(h1_tag, h1_tag + "\n" + img_tag, 1)
# If no H1 found, insert at beginning
img_tag = f'<img src="{hero_url}" alt="{alt_text}" />'
return img_tag + "\n" + html
def insert_content_images_after_h2s(html: str, image_urls: List[str], alt_texts: List[str]) -> str:
"""Insert content images after H2 sections, distributed evenly"""
if not image_urls:
return html
# Find all H2 tags
pattern = r'(<h2[^>]*>.*?</h2>)'
h2_matches = list(re.finditer(pattern, html, re.IGNORECASE | re.DOTALL))
if not h2_matches:
# No H2s, insert at end
img_tags = [f'<img src="{url}" alt="{alt}" />' for url, alt in zip(image_urls, alt_texts)]
return html + "\n" + "\n".join(img_tags)
# Distribute images across H2s
result = html
h2_positions = [(m.start(), m.end()) for m in h2_matches]
# Insert images after H2s, evenly distributed
images_per_h2 = len(image_urls) / len(h2_matches) if h2_matches else 0
inserted = 0
for i, (start, end) in enumerate(h2_positions):
if inserted >= len(image_urls):
break
# Calculate which images to insert after this H2
start_idx = int(i * images_per_h2)
end_idx = int((i + 1) * images_per_h2) if i < len(h2_positions) - 1 else len(image_urls)
if start_idx < len(image_urls):
h2_tag = html[start:end]
img_tags = []
for j in range(start_idx, min(end_idx, len(image_urls))):
img_tag = f'<img src="{image_urls[j]}" alt="{alt_texts[j] if j < len(alt_texts) else alt_texts[0]}" />'
img_tags.append(img_tag)
inserted += 1
if img_tags:
replacement = h2_tag + "\n" + "\n".join(img_tags)
result = result.replace(h2_tag, replacement, 1)
return result

View File

@ -0,0 +1,65 @@
"""
Image upload utilities for storage zones
"""
import logging
import requests
from typing import Optional
from src.deployment.bunny_storage import BunnyStorageClient
from src.database.models import SiteDeployment
from src.generation.url_generator import generate_public_url
logger = logging.getLogger(__name__)
def upload_image_to_storage(
storage_client: BunnyStorageClient,
site: SiteDeployment,
image_bytes: bytes,
file_path: str
) -> Optional[str]:
"""
Upload image to storage zone and return public URL
Args:
storage_client: BunnyStorageClient instance
site: SiteDeployment with zone info
image_bytes: Image file bytes
file_path: Path within storage zone (e.g., 'images/hero.jpg')
Returns:
Public URL if successful, None if failed
"""
try:
# Check if file exists first
base_url = storage_client._get_storage_url(site.storage_zone_region)
check_url = f"{base_url}/{site.storage_zone_name}/{file_path}"
headers = {"AccessKey": site.storage_zone_password}
check_response = requests.head(check_url, headers=headers, timeout=10)
if check_response.status_code == 200:
# File exists, return existing URL
logger.info(f"Image {file_path} already exists, using existing")
return generate_public_url(site, file_path)
# Upload image (binary data)
url = f"{base_url}/{site.storage_zone_name}/{file_path}"
headers = {
"AccessKey": site.storage_zone_password,
"Content-Type": "image/jpeg",
"accept": "application/json"
}
response = requests.put(url, data=image_bytes, headers=headers, timeout=60)
if response.status_code in [200, 201]:
logger.info(f"Uploaded image {file_path} to {site.storage_zone_name}")
return generate_public_url(site, file_path)
else:
logger.error(f"Failed to upload image {file_path}: {response.status_code} - {response.text}")
return None
except Exception as e:
logger.error(f"Error uploading image {file_path}: {e}", exc_info=True)
return None

View File

@ -67,6 +67,29 @@ class InterlinkingConfig:
see_also_max: int = 5
@dataclass
class HeroImageConfig:
"""Configuration for hero images"""
width: int = 1280
height: int = 720
@dataclass
class ContentImageConfig:
"""Configuration for content images"""
min_num_images: int = 0
max_num_images: int = 0
width: int = 512
height: int = 512
@dataclass
class ImageConfig:
"""Configuration for image generation"""
hero: Optional[HeroImageConfig] = None
content: Optional[ContentImageConfig] = None
@dataclass
class TierConfig:
"""Configuration for a specific tier"""
@ -79,6 +102,7 @@ class TierConfig:
max_h3_tags: int
anchor_text_config: Optional[AnchorTextConfig] = None
models: Optional[ModelConfig] = None
image_config: Optional[ImageConfig] = None
@dataclass
@ -344,6 +368,60 @@ class JobConfig:
content=models_data["content"]
)
# Parse image_config if present
image_config = None
if "image_config" in tier_data:
img_data = tier_data["image_config"]
if not isinstance(img_data, dict):
raise ValueError(f"'{tier_name}.image_config' must be an object")
hero_config = None
if "hero" in img_data:
hero_data = img_data["hero"]
if not isinstance(hero_data, dict):
raise ValueError(f"'{tier_name}.image_config.hero' must be an object")
hero_config = HeroImageConfig(
width=hero_data.get("width", 1280),
height=hero_data.get("height", 720)
)
else:
# Default hero config for all tiers
hero_config = HeroImageConfig()
content_config = None
if "content" in img_data:
content_data = img_data["content"]
if not isinstance(content_data, dict):
raise ValueError(f"'{tier_name}.image_config.content' must be an object")
min_imgs = content_data.get("min_num_images", 0)
max_imgs = content_data.get("max_num_images", 0)
# Defaults: T1 = 1-3, others = 0-0
if tier_name == "tier1" and min_imgs == 0 and max_imgs == 0:
min_imgs = 1
max_imgs = 3
content_config = ContentImageConfig(
min_num_images=min_imgs,
max_num_images=max_imgs,
width=content_data.get("width", 512),
height=content_data.get("height", 512)
)
else:
# Default content config based on tier
if tier_name == "tier1":
content_config = ContentImageConfig(min_num_images=1, max_num_images=3)
else:
content_config = ContentImageConfig(min_num_images=0, max_num_images=0)
image_config = ImageConfig(hero=hero_config, content=content_config)
else:
# Default image config if not specified
hero_config = HeroImageConfig()
if tier_name == "tier1":
content_config = ContentImageConfig(min_num_images=1, max_num_images=3)
else:
content_config = ContentImageConfig(min_num_images=0, max_num_images=0)
image_config = ImageConfig(hero=hero_config, content=content_config)
return TierConfig(
count=tier_data.get("count", 1),
min_word_count=tier_data.get("min_word_count", defaults["min_word_count"]),
@ -353,7 +431,8 @@ class JobConfig:
min_h3_tags=tier_data.get("min_h3_tags", defaults["min_h3_tags"]),
max_h3_tags=tier_data.get("max_h3_tags", defaults["max_h3_tags"]),
anchor_text_config=anchor_text_config,
models=tier_models
models=tier_models,
image_config=image_config
)
def _parse_tier_from_array(self, tier_name: str, tier_data: dict) -> TierConfig:
@ -379,6 +458,56 @@ class JobConfig:
raise ValueError(f"'{tier_name}.anchor_text_config' custom_text must be an array")
anchor_text_config = AnchorTextConfig(mode=mode, custom_text=custom_text)
# Parse image_config if present (same logic as _parse_tier)
image_config = None
if "image_config" in tier_data:
img_data = tier_data["image_config"]
if not isinstance(img_data, dict):
raise ValueError(f"'{tier_name}.image_config' must be an object")
hero_config = None
if "hero" in img_data:
hero_data = img_data["hero"]
if not isinstance(hero_data, dict):
raise ValueError(f"'{tier_name}.image_config.hero' must be an object")
hero_config = HeroImageConfig(
width=hero_data.get("width", 1280),
height=hero_data.get("height", 720)
)
else:
hero_config = HeroImageConfig()
content_config = None
if "content" in img_data:
content_data = img_data["content"]
if not isinstance(content_data, dict):
raise ValueError(f"'{tier_name}.image_config.content' must be an object")
min_imgs = content_data.get("min_num_images", 0)
max_imgs = content_data.get("max_num_images", 0)
if tier_name == "tier1" and min_imgs == 0 and max_imgs == 0:
min_imgs = 1
max_imgs = 3
content_config = ContentImageConfig(
min_num_images=min_imgs,
max_num_images=max_imgs,
width=content_data.get("width", 512),
height=content_data.get("height", 512)
)
else:
if tier_name == "tier1":
content_config = ContentImageConfig(min_num_images=1, max_num_images=3)
else:
content_config = ContentImageConfig(min_num_images=0, max_num_images=0)
image_config = ImageConfig(hero=hero_config, content=content_config)
else:
hero_config = HeroImageConfig()
if tier_name == "tier1":
content_config = ContentImageConfig(min_num_images=1, max_num_images=3)
else:
content_config = ContentImageConfig(min_num_images=0, max_num_images=0)
image_config = ImageConfig(hero=hero_config, content=content_config)
return TierConfig(
count=count,
min_word_count=tier_data.get("min_word_count", defaults["min_word_count"]),
@ -387,7 +516,8 @@ class JobConfig:
max_h2_tags=tier_data.get("max_h2_tags", defaults["max_h2_tags"]),
min_h3_tags=tier_data.get("min_h3_tags", defaults["min_h3_tags"]),
max_h3_tags=tier_data.get("max_h3_tags", defaults["max_h3_tags"]),
anchor_text_config=anchor_text_config
anchor_text_config=anchor_text_config,
image_config=image_config
)
def get_jobs(self) -> list[Job]:

View File

@ -0,0 +1,9 @@
h2_prompts = {
"general": "Clean, professional illustration related to {h2_topic}, modern business style, simple geometric shapes, corporate color palette, minimalist design, high-quality vector art style",
"technical": "Technical diagram or infographic about {h2_topic}, clean lines, professional schematic style, industrial design, blue and gray tones, modern technical illustration",
"process": "Step-by-step process visualization for {h2_topic}, clean flowchart style, professional arrows and connections, corporate color scheme, modern infographic design",
"benefits": "Professional icon-based illustration showing {h2_topic}, clean symbol design, business-friendly colors, modern flat design style, organized layout"
}