"""
Test script to verify image reinsertion after interlink injection
Tests the new flow:
1. Get existing articles (2 T1, 2 T2) from project 30
2. Simulate interlink injection (already done, just read current content)
3. Re-insert images using _reinsert_images logic
4. Apply templates
5. Save formatted HTML locally to verify images display
Usage:
uv run python scripts/test_image_reinsertion.py
"""
import sys
from pathlib import Path
project_root = Path(__file__).parent.parent
sys.path.insert(0, str(project_root))
from src.database.session import db_manager
from src.database.repositories import GeneratedContentRepository, ProjectRepository, SiteDeploymentRepository
from src.generation.image_injection import insert_hero_after_h1, insert_content_images_after_h2s, generate_alt_text
from src.templating.service import TemplateService
def test_image_reinsertion(project_id: int = 30):
"""Test image reinsertion on existing articles"""
session = db_manager.get_session()
try:
content_repo = GeneratedContentRepository(session)
project_repo = ProjectRepository(session)
site_repo = SiteDeploymentRepository(session)
project = project_repo.get_by_id(project_id)
if not project:
print(f"Project {project_id} not found")
return
# Get 2 T1 and 2 T2 articles
t1_articles = content_repo.get_by_project_and_tier(project_id, "tier1", require_site=False)
t2_articles = content_repo.get_by_project_and_tier(project_id, "tier2", require_site=False)
if len(t1_articles) < 2:
print(f"Not enough T1 articles (found {len(t1_articles)}, need 2)")
return
if len(t2_articles) < 2:
print(f"Not enough T2 articles (found {len(t2_articles)}, need 2)")
return
test_articles = t1_articles[:2] + t2_articles[:2]
print(f"\nTesting image reinsertion for project {project_id}: {project.name}")
print(f"Selected {len(test_articles)} articles:")
for article in test_articles:
has_hero = article.hero_image_url or "None"
has_content = f"{len(article.content_images) if article.content_images else 0} images"
existing_imgs = article.content.count("
tags in content: {existing_imgs}")
# Create output directory
output_dir = Path("test_output")
output_dir.mkdir(exist_ok=True)
# Initialize template service
template_service = TemplateService()
# Process each article
for article in test_articles:
print(f"\nProcessing: {article.title[:50]}...")
# Step 1: Get current content (after interlink injection)
html = article.content
print(f" Content length: {len(html)} chars")
# Step 2: Re-insert images (simulating _reinsert_images)
if article.hero_image_url or article.content_images:
print(f" Re-inserting images...")
# Remove existing images first (to avoid duplicates)
import re
existing_count = html.count("
0:
print(f" Removing {existing_count} existing image(s)...")
html = re.sub(r'
]*>', '', html)
# Insert hero image if exists
if article.hero_image_url:
alt_text = generate_alt_text(project)
html = insert_hero_after_h1(html, article.hero_image_url, alt_text)
print(f" Hero image inserted: {article.hero_image_url}")
else:
print(f" No hero image URL in database")
# Insert content images if exist
if article.content_images:
alt_texts = [generate_alt_text(project) for _ in article.content_images]
html = insert_content_images_after_h2s(html, article.content_images, alt_texts)
print(f" {len(article.content_images)} content images inserted")
else:
print(f" No images to insert (hero_image_url and content_images both empty)")
# Step 3: Apply template
print(f" Applying template...")
try:
# Get template name from site or use default
template_name = template_service.select_template_for_content(
site_deployment_id=article.site_deployment_id,
site_deployment_repo=site_repo
)
# Generate meta description
import re
from html import unescape
text = re.sub(r'<[^>]+>', '', html)
text = unescape(text)
words = text.split()[:25]
meta_description = ' '.join(words) + '...'
# Format content with template
formatted_html = template_service.format_content(
content=html,
title=article.title,
meta_description=meta_description,
template_name=template_name,
canonical_url=article.deployed_url
)
print(f" Template '{template_name}' applied")
# Step 4: Save to file
safe_title = "".join(c for c in article.title if c.isalnum() or c in (' ', '-', '_')).rstrip()[:50]
filename = f"{article.tier}_{article.id}_{safe_title}.html"
filepath = output_dir / filename
with open(filepath, 'w', encoding='utf-8') as f:
f.write(formatted_html)
print(f" Saved to: {filepath}")
# Check if images are in the HTML
hero_count = formatted_html.count(article.hero_image_url) if article.hero_image_url else 0
content_count = sum(formatted_html.count(url) for url in (article.content_images or []))
print(f" Image check: Hero={hero_count}, Content={content_count}")
except Exception as e:
print(f" ERROR applying template: {e}")
import traceback
traceback.print_exc()
print(f"\n✓ Test complete! Check files in {output_dir}/")
print(f" Open the HTML files in a browser to verify images display correctly.")
finally:
session.close()
if __name__ == "__main__":
project_id = 30
if len(sys.argv) > 1:
try:
project_id = int(sys.argv[1])
except ValueError:
print(f"Invalid project_id: {sys.argv[1]}. Using default: 30")
test_image_reinsertion(project_id)