""" Content generation service with three-stage pipeline """ import re import json from html import unescape from pathlib import Path from datetime import datetime from typing import Optional, Tuple from src.generation.ai_client import AIClient, PromptManager from src.database.repositories import ProjectRepository, GeneratedContentRepository, SiteDeploymentRepository from src.templating.service import TemplateService class ContentGenerator: """Main service for generating content through AI pipeline""" def __init__( self, ai_client: AIClient, prompt_manager: PromptManager, project_repo: ProjectRepository, content_repo: GeneratedContentRepository, template_service: Optional[TemplateService] = None, site_deployment_repo: Optional[SiteDeploymentRepository] = None ): self.ai_client = ai_client self.prompt_manager = prompt_manager self.project_repo = project_repo self.content_repo = content_repo self.template_service = template_service or TemplateService(content_repo) self.site_deployment_repo = site_deployment_repo def generate_title(self, project_id: int, debug: bool = False, model: Optional[str] = None) -> str: """ Generate SEO-optimized title Args: project_id: Project ID to generate title for debug: If True, save response to debug_output/ model: Optional model override for this generation stage Returns: Generated title string """ project = self.project_repo.get_by_id(project_id) if not project: raise ValueError(f"Project {project_id} not found") entities_str = ", ".join(project.entities or []) related_str = ", ".join(project.related_searches or []) system_msg, user_prompt = self.prompt_manager.format_prompt( "title_generation", keyword=project.main_keyword, entities=entities_str, related_searches=related_str ) title = self.ai_client.generate_completion( prompt=user_prompt, system_message=system_msg, max_tokens=100, temperature=0.7, override_model=model ) title = title.strip().strip('"').strip("'") if debug: self._save_debug_output( project_id, "title", title, "txt" ) return title def generate_outline( self, project_id: int, title: str, min_h2: int, max_h2: int, min_h3: int, max_h3: int, debug: bool = False, model: Optional[str] = None ) -> dict: """ Generate article outline in JSON format Args: project_id: Project ID title: Article title min_h2: Minimum H2 headings max_h2: Maximum H2 headings min_h3: Minimum H3 subheadings total max_h3: Maximum H3 subheadings total debug: If True, save response to debug_output/ model: Optional model override for this generation stage Returns: Outline dictionary: {"outline": [{"h2": "...", "h3": ["...", "..."]}]} Raises: ValueError: If outline doesn't meet minimum requirements """ project = self.project_repo.get_by_id(project_id) if not project: raise ValueError(f"Project {project_id} not found") entities_str = ", ".join(project.entities or []) related_str = ", ".join(project.related_searches or []) system_msg, user_prompt = self.prompt_manager.format_prompt( "outline_generation", title=title, keyword=project.main_keyword, min_h2=min_h2, max_h2=max_h2, min_h3=min_h3, max_h3=max_h3, entities=entities_str, related_searches=related_str ) outline_json = self.ai_client.generate_completion( prompt=user_prompt, system_message=system_msg, max_tokens=2000, temperature=0.7, json_mode=True, override_model=model ) print(f"[DEBUG] Raw outline response: {outline_json}") # Save raw response immediately if debug: self._save_debug_output(project_id, "outline_raw", outline_json, "txt") print(f"[DEBUG] Raw outline response: {outline_json}") try: outline = json.loads(outline_json) except json.JSONDecodeError as e: if debug: self._save_debug_output(project_id, "outline_error", outline_json, "txt") raise ValueError(f"Failed to parse outline JSON: {e}\nResponse: {outline_json[:500]}") if "outline" not in outline: if debug: self._save_debug_output(project_id, "outline_invalid", json.dumps(outline, indent=2), "json") raise ValueError(f"Outline missing 'outline' key. Got keys: {list(outline.keys())}\nContent: {outline}") h2_count = len(outline["outline"]) h3_count = sum(len(section.get("h3", [])) for section in outline["outline"]) if h2_count < min_h2: raise ValueError(f"Outline has {h2_count} H2s, minimum is {min_h2}") if h3_count < min_h3: raise ValueError(f"Outline has {h3_count} H3s, minimum is {min_h3}") if debug: self._save_debug_output( project_id, "outline", json.dumps(outline, indent=2), "json" ) return outline def generate_content( self, project_id: int, title: str, outline: dict, min_word_count: int, max_word_count: int, debug: bool = False, model: Optional[str] = None ) -> str: """ Generate full article HTML fragment Args: project_id: Project ID title: Article title outline: Article outline dict min_word_count: Minimum word count for guidance max_word_count: Maximum word count for guidance debug: If True, save response to debug_output/ model: Optional model override for this generation stage Returns: HTML string with
tags """ project = self.project_repo.get_by_id(project_id) if not project: raise ValueError(f"Project {project_id} not found") entities_str = ", ".join(project.entities or []) related_str = ", ".join(project.related_searches or []) outline_str = json.dumps(outline, indent=2) system_msg, user_prompt = self.prompt_manager.format_prompt( "content_generation", title=title, outline=outline_str, keyword=project.main_keyword, entities=entities_str, related_searches=related_str, min_word_count=min_word_count, max_word_count=max_word_count ) content = self.ai_client.generate_completion( prompt=user_prompt, system_message=system_msg, max_tokens=8000, temperature=0.7, override_model=model ) content = content.strip() if debug: self._save_debug_output( project_id, "content", content, "html" ) return content def validate_word_count(self, content: str, min_words: int, max_words: int) -> Tuple[bool, int]: """ Validate content word count Args: content: HTML content string min_words: Minimum word count max_words: Maximum word count Returns: Tuple of (is_valid, actual_count) """ word_count = self.count_words(content) is_valid = min_words <= word_count <= max_words return is_valid, word_count def count_words(self, html_content: str) -> int: """ Count words in HTML content Args: html_content: HTML string Returns: Number of words """ text = re.sub(r'<[^>]+>', '', html_content) text = unescape(text) words = text.split() return len(words) def augment_content( self, content: str, target_word_count: int, debug: bool = False, project_id: Optional[int] = None, model: Optional[str] = None ) -> str: """ Expand article content to meet minimum word count Args: content: Current HTML content target_word_count: Target word count debug: If True, save response to debug_output/ project_id: Optional project ID for debug output model: Optional model override for this generation stage Returns: Expanded HTML content """ system_msg, user_prompt = self.prompt_manager.format_prompt( "content_augmentation", content=content, target_word_count=target_word_count ) augmented = self.ai_client.generate_completion( prompt=user_prompt, system_message=system_msg, max_tokens=8000, temperature=0.7, override_model=model ) augmented = augmented.strip() if debug and project_id: self._save_debug_output( project_id, "augmented", augmented, "html" ) return augmented def apply_template( self, content_id: int, meta_description: Optional[str] = None ) -> bool: """ Apply HTML template to generated content and save to database Args: content_id: GeneratedContent ID to format meta_description: Optional meta description (defaults to truncated content) Returns: True if successful, False otherwise """ try: content_record = self.content_repo.get_by_id(content_id) if not content_record: print(f"Warning: Content {content_id} not found") return False if not meta_description: text = re.sub(r'<[^>]+>', '', content_record.content) text = unescape(text) words = text.split()[:25] meta_description = ' '.join(words) + '...' template_name = self.template_service.select_template_for_content( site_deployment_id=content_record.site_deployment_id, site_deployment_repo=self.site_deployment_repo ) formatted_html = self.template_service.format_content( content=content_record.content, title=content_record.title, meta_description=meta_description, template_name=template_name ) content_record.formatted_html = formatted_html content_record.template_used = template_name self.content_repo.update(content_record) print(f"Applied template '{template_name}' to content {content_id}") return True except Exception as e: print(f"Error applying template to content {content_id}: {e}") return False def _save_debug_output( self, project_id: int, stage: str, content: str, extension: str, tier: Optional[str] = None, article_num: Optional[int] = None ): """Save debug output to file""" debug_dir = Path("debug_output") debug_dir.mkdir(exist_ok=True) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") tier_part = f"_tier{tier}" if tier else "" article_part = f"_article{article_num}" if article_num else "" filename = f"{stage}_project{project_id}{tier_part}{article_part}_{timestamp}.{extension}" filepath = debug_dir / filename with open(filepath, 'w', encoding='utf-8') as f: f.write(content)