From 5eef4fe507e13ad09c1c5d8a5605348f7e9205ca Mon Sep 17 00:00:00 2001 From: PeninsulaInd Date: Mon, 27 Oct 2025 14:37:47 -0500 Subject: [PATCH] Fixed concurrency and model changes at job level - version 1.1.0 --- scripts/delete_generated_content.py | 66 +++++++++++++++++++ src/generation/ai_client.py | 45 ++++++++++--- src/generation/batch_processor.py | 28 +++++++- src/generation/job_config.py | 18 ++++- .../prompts/content_generation.json | 2 +- src/generation/service.py | 10 ++- 6 files changed, 154 insertions(+), 15 deletions(-) create mode 100644 scripts/delete_generated_content.py diff --git a/scripts/delete_generated_content.py b/scripts/delete_generated_content.py new file mode 100644 index 0000000..5800487 --- /dev/null +++ b/scripts/delete_generated_content.py @@ -0,0 +1,66 @@ +""" +Delete all generated content for a specific project +""" +import sys +from pathlib import Path +sys.path.insert(0, str(Path(__file__).parent.parent)) + +import click +from src.database.session import db_manager +from src.database.models import GeneratedContent + +def delete_project_content(project_id: int, confirm: bool = True): + """ + Delete all generated content for a project + + Args: + project_id: Project ID to delete content for + confirm: If True, ask for confirmation before deleting + """ + db_manager.initialize() + session = db_manager.get_session() + + try: + records = session.query(GeneratedContent).filter( + GeneratedContent.project_id == project_id + ).all() + + count = len(records) + + if count == 0: + click.echo(f"No generated content found for project {project_id}") + return + + click.echo(f"Found {count} generated content records for project {project_id}:") + for record in records: + click.echo(f" - ID {record.id}: {record.tier} - {record.title[:60]}...") + + if confirm: + response = click.confirm(f"\nDelete all {count} records?", default=False) + if not response: + click.echo("Cancelled.") + return + + for record in records: + session.delete(record) + + session.commit() + click.echo(f"Successfully deleted {count} records for project {project_id}") + + except Exception as e: + session.rollback() + click.echo(f"Error: {e}", err=True) + raise + finally: + session.close() + +@click.command() +@click.argument('project_id', type=int) +@click.option('--yes', '-y', is_flag=True, help='Skip confirmation prompt') +def main(project_id: int, yes: bool): + """Delete all generated content for PROJECT_ID""" + delete_project_content(project_id, confirm=not yes) + +if __name__ == "__main__": + main() + diff --git a/src/generation/ai_client.py b/src/generation/ai_client.py index 76d8b2d..60ec5af 100644 --- a/src/generation/ai_client.py +++ b/src/generation/ai_client.py @@ -6,7 +6,7 @@ import time import json from pathlib import Path from typing import Optional, Dict, Any -from openai import OpenAI, RateLimitError, APIError +from openai import OpenAI, RateLimitError, APIError, APIConnectionError, APITimeoutError from src.core.config import get_config AVAILABLE_MODELS = { @@ -24,7 +24,14 @@ class AIClient: model: str, base_url: str = "https://openrouter.ai/api/v1" ): - self.client = OpenAI(api_key=api_key, base_url=base_url) + self.client = OpenAI( + api_key=api_key, + base_url=base_url, + default_headers={ + "HTTP-Referer": "https://github.com/yourusername/Big-Link-Man", + "X-Title": "Big-Link-Man" + } + ) if model in AVAILABLE_MODELS: self.model = AVAILABLE_MODELS[model] @@ -72,32 +79,54 @@ class AIClient: kwargs["response_format"] = {"type": "json_object"} retries = 3 + wait_times = [10, 20] + for attempt in range(retries): try: response = self.client.chat.completions.create(**kwargs) content = response.choices[0].message.content or "" - # Debug: print first 200 chars if json_mode if json_mode: print(f"[DEBUG] AI Response (first 200 chars): {content[:200]}") return content except RateLimitError as e: if attempt < retries - 1: - wait_time = 2 ** attempt - print(f"Rate limit hit. Retrying in {wait_time}s...") + wait_time = wait_times[attempt] + print(f"[API] Rate limit hit. Retrying in {wait_time}s... (attempt {attempt + 1}/{retries})") time.sleep(wait_time) else: + print(f"[API] Rate limit exceeded after {retries} attempts") + raise + + except (APIConnectionError, APITimeoutError) as e: + if attempt < retries - 1: + wait_time = wait_times[attempt] + print(f"[API] Connection/timeout error. Retrying in {wait_time}s... (attempt {attempt + 1}/{retries})") + time.sleep(wait_time) + else: + print(f"[API] Connection failed after {retries} attempts") + raise + + except json.JSONDecodeError as e: + if attempt < retries - 1: + wait_time = wait_times[attempt] + print(f"[API] Invalid JSON response (likely API error page). Retrying in {wait_time}s... (attempt {attempt + 1}/{retries})") + time.sleep(wait_time) + else: + print(f"[API] Failed to get valid response after {retries} attempts") raise except APIError as e: - if attempt < retries - 1 and "network" in str(e).lower(): - wait_time = 2 ** attempt - print(f"Network error. Retrying in {wait_time}s...") + if attempt < retries - 1: + wait_time = wait_times[attempt] + print(f"[API] API error: {str(e)[:100]}. Retrying in {wait_time}s... (attempt {attempt + 1}/{retries})") time.sleep(wait_time) else: + print(f"[API] API error after {retries} attempts: {str(e)[:200]}") raise except Exception as e: + print(f"[API] Unexpected error: {type(e).__name__}: {str(e)[:200]}") raise return "" diff --git a/src/generation/batch_processor.py b/src/generation/batch_processor.py index aab9e24..f4ed624 100644 --- a/src/generation/batch_processor.py +++ b/src/generation/batch_processor.py @@ -5,6 +5,7 @@ Batch processor for content generation jobs from typing import Dict, Any, Optional, List import click import os +import time from pathlib import Path from datetime import datetime from concurrent.futures import ThreadPoolExecutor, as_completed @@ -45,7 +46,11 @@ class BatchProcessor: "total_articles": 0, "generated_articles": 0, "augmented_articles": 0, - "failed_articles": 0 + "failed_articles": 0, + "tier1_time": 0.0, + "tier2_time": 0.0, + "tier3_time": 0.0, + "total_time": 0.0 } def process_job( @@ -64,6 +69,8 @@ class BatchProcessor: continue_on_error: If True, continue on article generation failure auto_deploy: If True, deploy to cloud storage after generation (default: True) """ + start_time = time.time() + job_config = JobConfig(job_file_path) jobs = job_config.get_jobs() @@ -78,6 +85,7 @@ class BatchProcessor: if not continue_on_error: raise + self.stats["total_time"] = time.time() - start_time self._print_summary() def _generate_all_titles_for_tier( @@ -175,6 +183,8 @@ class BatchProcessor: raise for tier_name, tier_config in job.tiers.items(): + tier_start_time = time.time() + self._process_tier( job.project_id, tier_name, @@ -184,6 +194,10 @@ class BatchProcessor: debug, continue_on_error ) + + tier_elapsed = time.time() - tier_start_time + with self.stats_lock: + self.stats[f"{tier_name}_time"] += tier_elapsed if auto_deploy: try: @@ -210,7 +224,7 @@ class BatchProcessor: project = self.project_repo.get_by_id(project_id) keyword = project.main_keyword - models = job.models if job.models else None + models = tier_config.models if tier_config.models else (job.models if job.models else None) click.echo(f"\n[{tier_name}] Generating {tier_config.count} titles in batches...") titles_file = self._generate_all_titles_for_tier( @@ -718,4 +732,14 @@ class BatchProcessor: click.echo(f"Articles generated: {self.stats['generated_articles']}/{self.stats['total_articles']}") click.echo(f"Augmented: {self.stats['augmented_articles']}") click.echo(f"Failed: {self.stats['failed_articles']}") + click.echo("") + click.echo("TIMING") + click.echo("-" * 60) + if self.stats['tier1_time'] > 0: + click.echo(f"Tier 1 Time: {self.stats['tier1_time']:.1f}s ({self.stats['tier1_time']/60:.1f}m)") + if self.stats['tier2_time'] > 0: + click.echo(f"Tier 2 Time: {self.stats['tier2_time']:.1f}s ({self.stats['tier2_time']/60:.1f}m)") + if self.stats['tier3_time'] > 0: + click.echo(f"Tier 3 Time: {self.stats['tier3_time']:.1f}s ({self.stats['tier3_time']/60:.1f}m)") + click.echo(f"Total Time: {self.stats['total_time']:.1f}s ({self.stats['total_time']/60:.1f}m)") click.echo("="*60) \ No newline at end of file diff --git a/src/generation/job_config.py b/src/generation/job_config.py index 11a4e0a..c082b30 100644 --- a/src/generation/job_config.py +++ b/src/generation/job_config.py @@ -78,6 +78,7 @@ class TierConfig: min_h3_tags: int max_h3_tags: int anchor_text_config: Optional[AnchorTextConfig] = None + models: Optional[ModelConfig] = None @dataclass @@ -329,6 +330,20 @@ class JobConfig: raise ValueError(f"'{tier_name}.anchor_text_config' custom_text must be an array") anchor_text_config = AnchorTextConfig(mode=mode, custom_text=custom_text) + # Parse tier-level models if present + tier_models = None + if "models" in tier_data: + models_data = tier_data["models"] + if not isinstance(models_data, dict): + raise ValueError(f"'{tier_name}.models' must be an object") + if "title" not in models_data or "outline" not in models_data or "content" not in models_data: + raise ValueError(f"'{tier_name}.models' must have 'title', 'outline', and 'content' fields") + tier_models = ModelConfig( + title=models_data["title"], + outline=models_data["outline"], + content=models_data["content"] + ) + return TierConfig( count=tier_data.get("count", 1), min_word_count=tier_data.get("min_word_count", defaults["min_word_count"]), @@ -337,7 +352,8 @@ class JobConfig: max_h2_tags=tier_data.get("max_h2_tags", defaults["max_h2_tags"]), min_h3_tags=tier_data.get("min_h3_tags", defaults["min_h3_tags"]), max_h3_tags=tier_data.get("max_h3_tags", defaults["max_h3_tags"]), - anchor_text_config=anchor_text_config + anchor_text_config=anchor_text_config, + models=tier_models ) def _parse_tier_from_array(self, tier_name: str, tier_data: dict) -> TierConfig: diff --git a/src/generation/prompts/content_generation.json b/src/generation/prompts/content_generation.json index ca96093..6a1cdf9 100644 --- a/src/generation/prompts/content_generation.json +++ b/src/generation/prompts/content_generation.json @@ -1,5 +1,5 @@ { "system_message": "You are an expert content writer who creates engaging, informative, and SEO-optimized articles that provide real value to readers while incorporating relevant keywords naturally.", - "user_prompt": "Write a complete article based on:\nTitle: {title}\nOutline: {outline}\nKeyword: {keyword}\n\nEntities to include naturally: {entities}\nRelated searches to address: {related_searches}\n\nTarget token count range: {min_word_count} to {max_word_count} tokens.\n\nReturn as an HTML fragment with

,

, and

tags. Do NOT include , , , or tags. Start directly with the first

heading.\n\nWrite naturally and informatively. Incorporate the keyword, entities, and related searches organically throughout the content." + "user_prompt": "Write a complete article based on:\nTitle: {title}\nOutline: {outline}\nKeyword: {keyword}\n\nEntities to include naturally: {entities}\nRelated searches to address: {related_searches}\n\nTarget word count range: {min_word_count} to {max_word_count} words.\n\nIMPORTANT: Write approximately {words_per_section} words per H3 section to meet the target word count. Be thorough and substantive in each section.\n\nReturn as an HTML fragment with

,

, and

tags. Do NOT include , , , or tags. Start directly with the first

heading.\n\nWrite naturally and informatively. Incorporate the keyword, entities, and related searches organically throughout the content." } diff --git a/src/generation/service.py b/src/generation/service.py index 86c914c..c5dbb01 100644 --- a/src/generation/service.py +++ b/src/generation/service.py @@ -293,8 +293,11 @@ class ContentGenerator: related_str = ", ".join(project.related_searches or []) outline_str = json.dumps(outline, indent=2) - compensated_min = min_word_count + 200 - compensated_max = max_word_count + 200 + compensated_min = min_word_count + 100 + compensated_max = max_word_count + 100 + + h3_count = sum(len(section.get("h3", [])) for section in outline.get("outline", [])) + words_per_section = int(compensated_min / h3_count) if h3_count > 0 else 100 system_msg, user_prompt = self.prompt_manager.format_prompt( "content_generation", @@ -304,7 +307,8 @@ class ContentGenerator: entities=entities_str, related_searches=related_str, min_word_count=compensated_min, - max_word_count=compensated_max + max_word_count=compensated_max, + words_per_section=words_per_section ) content = self.ai_client.generate_completion(