""" Test script to verify image reinsertion after interlink injection Tests the new flow: 1. Get existing articles (2 T1, 2 T2) from project 30 2. Simulate interlink injection (already done, just read current content) 3. Re-insert images using _reinsert_images logic 4. Apply templates 5. Save formatted HTML locally to verify images display Usage: uv run python scripts/test_image_reinsertion.py """ import sys from pathlib import Path project_root = Path(__file__).parent.parent sys.path.insert(0, str(project_root)) from src.database.session import db_manager from src.database.repositories import GeneratedContentRepository, ProjectRepository, SiteDeploymentRepository from src.generation.image_injection import insert_hero_after_h1, insert_content_images_after_h2s, generate_alt_text from src.templating.service import TemplateService def test_image_reinsertion(project_id: int = 30): """Test image reinsertion on existing articles""" session = db_manager.get_session() try: content_repo = GeneratedContentRepository(session) project_repo = ProjectRepository(session) site_repo = SiteDeploymentRepository(session) project = project_repo.get_by_id(project_id) if not project: print(f"Project {project_id} not found") return # Get 2 T1 and 2 T2 articles t1_articles = content_repo.get_by_project_and_tier(project_id, "tier1", require_site=False) t2_articles = content_repo.get_by_project_and_tier(project_id, "tier2", require_site=False) if len(t1_articles) < 2: print(f"Not enough T1 articles (found {len(t1_articles)}, need 2)") return if len(t2_articles) < 2: print(f"Not enough T2 articles (found {len(t2_articles)}, need 2)") return test_articles = t1_articles[:2] + t2_articles[:2] print(f"\nTesting image reinsertion for project {project_id}: {project.name}") print(f"Selected {len(test_articles)} articles:") for article in test_articles: has_hero = article.hero_image_url or "None" has_content = f"{len(article.content_images) if article.content_images else 0} images" existing_imgs = article.content.count(" tags in content: {existing_imgs}") # Create output directory output_dir = Path("test_output") output_dir.mkdir(exist_ok=True) # Initialize template service template_service = TemplateService() # Process each article for article in test_articles: print(f"\nProcessing: {article.title[:50]}...") # Step 1: Get current content (after interlink injection) html = article.content print(f" Content length: {len(html)} chars") # Step 2: Re-insert images (simulating _reinsert_images) if article.hero_image_url or article.content_images: print(f" Re-inserting images...") # Remove existing images first (to avoid duplicates) import re existing_count = html.count(" 0: print(f" Removing {existing_count} existing image(s)...") html = re.sub(r']*>', '', html) # Insert hero image if exists if article.hero_image_url: alt_text = generate_alt_text(project) html = insert_hero_after_h1(html, article.hero_image_url, alt_text) print(f" Hero image inserted: {article.hero_image_url}") else: print(f" No hero image URL in database") # Insert content images if exist if article.content_images: alt_texts = [generate_alt_text(project) for _ in article.content_images] html = insert_content_images_after_h2s(html, article.content_images, alt_texts) print(f" {len(article.content_images)} content images inserted") else: print(f" No images to insert (hero_image_url and content_images both empty)") # Step 3: Apply template print(f" Applying template...") try: # Get template name from site or use default template_name = template_service.select_template_for_content( site_deployment_id=article.site_deployment_id, site_deployment_repo=site_repo ) # Generate meta description import re from html import unescape text = re.sub(r'<[^>]+>', '', html) text = unescape(text) words = text.split()[:25] meta_description = ' '.join(words) + '...' # Format content with template formatted_html = template_service.format_content( content=html, title=article.title, meta_description=meta_description, template_name=template_name, canonical_url=article.deployed_url ) print(f" Template '{template_name}' applied") # Step 4: Save to file safe_title = "".join(c for c in article.title if c.isalnum() or c in (' ', '-', '_')).rstrip()[:50] filename = f"{article.tier}_{article.id}_{safe_title}.html" filepath = output_dir / filename with open(filepath, 'w', encoding='utf-8') as f: f.write(formatted_html) print(f" Saved to: {filepath}") # Check if images are in the HTML hero_count = formatted_html.count(article.hero_image_url) if article.hero_image_url else 0 content_count = sum(formatted_html.count(url) for url in (article.content_images or [])) print(f" Image check: Hero={hero_count}, Content={content_count}") except Exception as e: print(f" ERROR applying template: {e}") import traceback traceback.print_exc() print(f"\n✓ Test complete! Check files in {output_dir}/") print(f" Open the HTML files in a browser to verify images display correctly.") finally: session.close() if __name__ == "__main__": project_id = 30 if len(sys.argv) > 1: try: project_id = int(sys.argv[1]) except ValueError: print(f"Invalid project_id: {sys.argv[1]}. Using default: 30") test_image_reinsertion(project_id)