From 5eef4fe507e13ad09c1c5d8a5605348f7e9205ca Mon Sep 17 00:00:00 2001
From: PeninsulaInd <bryan@peninsulaindustries.com>
Date: Mon, 27 Oct 2025 14:37:47 -0500
Subject: [PATCH] Fixed concurrency and model changes at job level - version
 1.1.0

---
 scripts/delete_generated_content.py           | 66 +++++++++++++++++++
 src/generation/ai_client.py                   | 45 ++++++++++---
 src/generation/batch_processor.py             | 28 +++++++-
 src/generation/job_config.py                  | 18 ++++-
 .../prompts/content_generation.json           |  2 +-
 src/generation/service.py                     | 10 ++-
 6 files changed, 154 insertions(+), 15 deletions(-)
 create mode 100644 scripts/delete_generated_content.py

diff --git a/scripts/delete_generated_content.py b/scripts/delete_generated_content.py
new file mode 100644
index 0000000..5800487
--- /dev/null
+++ b/scripts/delete_generated_content.py
@@ -0,0 +1,66 @@
+"""
+Delete all generated content for a specific project
+"""
+import sys
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+import click
+from src.database.session import db_manager
+from src.database.models import GeneratedContent
+
+def delete_project_content(project_id: int, confirm: bool = True):
+    """
+    Delete all generated content for a project
+    
+    Args:
+        project_id: Project ID to delete content for
+        confirm: If True, ask for confirmation before deleting
+    """
+    db_manager.initialize()
+    session = db_manager.get_session()
+    
+    try:
+        records = session.query(GeneratedContent).filter(
+            GeneratedContent.project_id == project_id
+        ).all()
+        
+        count = len(records)
+        
+        if count == 0:
+            click.echo(f"No generated content found for project {project_id}")
+            return
+        
+        click.echo(f"Found {count} generated content records for project {project_id}:")
+        for record in records:
+            click.echo(f"  - ID {record.id}: {record.tier} - {record.title[:60]}...")
+        
+        if confirm:
+            response = click.confirm(f"\nDelete all {count} records?", default=False)
+            if not response:
+                click.echo("Cancelled.")
+                return
+        
+        for record in records:
+            session.delete(record)
+        
+        session.commit()
+        click.echo(f"Successfully deleted {count} records for project {project_id}")
+        
+    except Exception as e:
+        session.rollback()
+        click.echo(f"Error: {e}", err=True)
+        raise
+    finally:
+        session.close()
+
+@click.command()
+@click.argument('project_id', type=int)
+@click.option('--yes', '-y', is_flag=True, help='Skip confirmation prompt')
+def main(project_id: int, yes: bool):
+    """Delete all generated content for PROJECT_ID"""
+    delete_project_content(project_id, confirm=not yes)
+
+if __name__ == "__main__":
+    main()
+
diff --git a/src/generation/ai_client.py b/src/generation/ai_client.py
index 76d8b2d..60ec5af 100644
--- a/src/generation/ai_client.py
+++ b/src/generation/ai_client.py
@@ -6,7 +6,7 @@ import time
 import json
 from pathlib import Path
 from typing import Optional, Dict, Any
-from openai import OpenAI, RateLimitError, APIError
+from openai import OpenAI, RateLimitError, APIError, APIConnectionError, APITimeoutError
 from src.core.config import get_config
 
 AVAILABLE_MODELS = {
@@ -24,7 +24,14 @@ class AIClient:
         model: str, 
         base_url: str = "https://openrouter.ai/api/v1"
     ):
-        self.client = OpenAI(api_key=api_key, base_url=base_url)
+        self.client = OpenAI(
+            api_key=api_key,
+            base_url=base_url,
+            default_headers={
+                "HTTP-Referer": "https://github.com/yourusername/Big-Link-Man",
+                "X-Title": "Big-Link-Man"
+            }
+        )
         
         if model in AVAILABLE_MODELS:
             self.model = AVAILABLE_MODELS[model]
@@ -72,32 +79,54 @@ class AIClient:
             kwargs["response_format"] = {"type": "json_object"}
         
         retries = 3
+        wait_times = [10, 20]
+        
         for attempt in range(retries):
             try:
                 response = self.client.chat.completions.create(**kwargs)
                 content = response.choices[0].message.content or ""
-                # Debug: print first 200 chars if json_mode
                 if json_mode:
                     print(f"[DEBUG] AI Response (first 200 chars): {content[:200]}")
                 return content
             
             except RateLimitError as e:
                 if attempt < retries - 1:
-                    wait_time = 2 ** attempt
-                    print(f"Rate limit hit. Retrying in {wait_time}s...")
+                    wait_time = wait_times[attempt]
+                    print(f"[API] Rate limit hit. Retrying in {wait_time}s... (attempt {attempt + 1}/{retries})")
                     time.sleep(wait_time)
                 else:
+                    print(f"[API] Rate limit exceeded after {retries} attempts")
+                    raise
+            
+            except (APIConnectionError, APITimeoutError) as e:
+                if attempt < retries - 1:
+                    wait_time = wait_times[attempt]
+                    print(f"[API] Connection/timeout error. Retrying in {wait_time}s... (attempt {attempt + 1}/{retries})")
+                    time.sleep(wait_time)
+                else:
+                    print(f"[API] Connection failed after {retries} attempts")
+                    raise
+            
+            except json.JSONDecodeError as e:
+                if attempt < retries - 1:
+                    wait_time = wait_times[attempt]
+                    print(f"[API] Invalid JSON response (likely API error page). Retrying in {wait_time}s... (attempt {attempt + 1}/{retries})")
+                    time.sleep(wait_time)
+                else:
+                    print(f"[API] Failed to get valid response after {retries} attempts")
                     raise
             
             except APIError as e:
-                if attempt < retries - 1 and "network" in str(e).lower():
-                    wait_time = 2 ** attempt
-                    print(f"Network error. Retrying in {wait_time}s...")
+                if attempt < retries - 1:
+                    wait_time = wait_times[attempt]
+                    print(f"[API] API error: {str(e)[:100]}. Retrying in {wait_time}s... (attempt {attempt + 1}/{retries})")
                     time.sleep(wait_time)
                 else:
+                    print(f"[API] API error after {retries} attempts: {str(e)[:200]}")
                     raise
             
             except Exception as e:
+                print(f"[API] Unexpected error: {type(e).__name__}: {str(e)[:200]}")
                 raise
         
         return ""
diff --git a/src/generation/batch_processor.py b/src/generation/batch_processor.py
index aab9e24..f4ed624 100644
--- a/src/generation/batch_processor.py
+++ b/src/generation/batch_processor.py
@@ -5,6 +5,7 @@ Batch processor for content generation jobs
 from typing import Dict, Any, Optional, List
 import click
 import os
+import time
 from pathlib import Path
 from datetime import datetime
 from concurrent.futures import ThreadPoolExecutor, as_completed
@@ -45,7 +46,11 @@ class BatchProcessor:
             "total_articles": 0,
             "generated_articles": 0,
             "augmented_articles": 0,
-            "failed_articles": 0
+            "failed_articles": 0,
+            "tier1_time": 0.0,
+            "tier2_time": 0.0,
+            "tier3_time": 0.0,
+            "total_time": 0.0
         }
     
     def process_job(
@@ -64,6 +69,8 @@ class BatchProcessor:
             continue_on_error: If True, continue on article generation failure
             auto_deploy: If True, deploy to cloud storage after generation (default: True)
         """
+        start_time = time.time()
+        
         job_config = JobConfig(job_file_path)
         jobs = job_config.get_jobs()
         
@@ -78,6 +85,7 @@ class BatchProcessor:
                 if not continue_on_error:
                     raise
         
+        self.stats["total_time"] = time.time() - start_time
         self._print_summary()
     
     def _generate_all_titles_for_tier(
@@ -175,6 +183,8 @@ class BatchProcessor:
                 raise
         
         for tier_name, tier_config in job.tiers.items():
+            tier_start_time = time.time()
+            
             self._process_tier(
                 job.project_id, 
                 tier_name, 
@@ -184,6 +194,10 @@ class BatchProcessor:
                 debug, 
                 continue_on_error
             )
+            
+            tier_elapsed = time.time() - tier_start_time
+            with self.stats_lock:
+                self.stats[f"{tier_name}_time"] += tier_elapsed
         
         if auto_deploy:
             try:
@@ -210,7 +224,7 @@ class BatchProcessor:
         project = self.project_repo.get_by_id(project_id)
         keyword = project.main_keyword
         
-        models = job.models if job.models else None
+        models = tier_config.models if tier_config.models else (job.models if job.models else None)
         
         click.echo(f"\n[{tier_name}] Generating {tier_config.count} titles in batches...")
         titles_file = self._generate_all_titles_for_tier(
@@ -718,4 +732,14 @@ class BatchProcessor:
         click.echo(f"Articles generated: {self.stats['generated_articles']}/{self.stats['total_articles']}")
         click.echo(f"Augmented: {self.stats['augmented_articles']}")
         click.echo(f"Failed: {self.stats['failed_articles']}")
+        click.echo("")
+        click.echo("TIMING")
+        click.echo("-" * 60)
+        if self.stats['tier1_time'] > 0:
+            click.echo(f"Tier 1 Time: {self.stats['tier1_time']:.1f}s ({self.stats['tier1_time']/60:.1f}m)")
+        if self.stats['tier2_time'] > 0:
+            click.echo(f"Tier 2 Time: {self.stats['tier2_time']:.1f}s ({self.stats['tier2_time']/60:.1f}m)")
+        if self.stats['tier3_time'] > 0:
+            click.echo(f"Tier 3 Time: {self.stats['tier3_time']:.1f}s ({self.stats['tier3_time']/60:.1f}m)")
+        click.echo(f"Total Time: {self.stats['total_time']:.1f}s ({self.stats['total_time']/60:.1f}m)")
         click.echo("="*60)
\ No newline at end of file
diff --git a/src/generation/job_config.py b/src/generation/job_config.py
index 11a4e0a..c082b30 100644
--- a/src/generation/job_config.py
+++ b/src/generation/job_config.py
@@ -78,6 +78,7 @@ class TierConfig:
     min_h3_tags: int
     max_h3_tags: int
     anchor_text_config: Optional[AnchorTextConfig] = None
+    models: Optional[ModelConfig] = None
 
 
 @dataclass
@@ -329,6 +330,20 @@ class JobConfig:
                 raise ValueError(f"'{tier_name}.anchor_text_config' custom_text must be an array")
             anchor_text_config = AnchorTextConfig(mode=mode, custom_text=custom_text)
         
+        # Parse tier-level models if present
+        tier_models = None
+        if "models" in tier_data:
+            models_data = tier_data["models"]
+            if not isinstance(models_data, dict):
+                raise ValueError(f"'{tier_name}.models' must be an object")
+            if "title" not in models_data or "outline" not in models_data or "content" not in models_data:
+                raise ValueError(f"'{tier_name}.models' must have 'title', 'outline', and 'content' fields")
+            tier_models = ModelConfig(
+                title=models_data["title"],
+                outline=models_data["outline"],
+                content=models_data["content"]
+            )
+        
         return TierConfig(
             count=tier_data.get("count", 1),
             min_word_count=tier_data.get("min_word_count", defaults["min_word_count"]),
@@ -337,7 +352,8 @@ class JobConfig:
             max_h2_tags=tier_data.get("max_h2_tags", defaults["max_h2_tags"]),
             min_h3_tags=tier_data.get("min_h3_tags", defaults["min_h3_tags"]),
             max_h3_tags=tier_data.get("max_h3_tags", defaults["max_h3_tags"]),
-            anchor_text_config=anchor_text_config
+            anchor_text_config=anchor_text_config,
+            models=tier_models
         )
     
     def _parse_tier_from_array(self, tier_name: str, tier_data: dict) -> TierConfig:
diff --git a/src/generation/prompts/content_generation.json b/src/generation/prompts/content_generation.json
index ca96093..6a1cdf9 100644
--- a/src/generation/prompts/content_generation.json
+++ b/src/generation/prompts/content_generation.json
@@ -1,5 +1,5 @@
 {
   "system_message": "You are an expert content writer who creates engaging, informative, and SEO-optimized articles that provide real value to readers while incorporating relevant keywords naturally.",
-  "user_prompt": "Write a complete article based on:\nTitle: {title}\nOutline: {outline}\nKeyword: {keyword}\n\nEntities to include naturally: {entities}\nRelated searches to address: {related_searches}\n\nTarget token count range: {min_word_count} to {max_word_count} tokens.\n\nReturn as an HTML fragment with <h2>, <h3>, and <p> tags. Do NOT include <!DOCTYPE>, <html>, <head>, or <body> tags. Start directly with the first <h2> heading.\n\nWrite naturally and informatively. Incorporate the keyword, entities, and related searches organically throughout the content."
+  "user_prompt": "Write a complete article based on:\nTitle: {title}\nOutline: {outline}\nKeyword: {keyword}\n\nEntities to include naturally: {entities}\nRelated searches to address: {related_searches}\n\nTarget word count range: {min_word_count} to {max_word_count} words.\n\nIMPORTANT: Write approximately {words_per_section} words per H3 section to meet the target word count. Be thorough and substantive in each section.\n\nReturn as an HTML fragment with <h2>, <h3>, and <p> tags. Do NOT include <!DOCTYPE>, <html>, <head>, or <body> tags. Start directly with the first <h2> heading.\n\nWrite naturally and informatively. Incorporate the keyword, entities, and related searches organically throughout the content."
 }
 
diff --git a/src/generation/service.py b/src/generation/service.py
index 86c914c..c5dbb01 100644
--- a/src/generation/service.py
+++ b/src/generation/service.py
@@ -293,8 +293,11 @@ class ContentGenerator:
         related_str = ", ".join(project.related_searches or [])
         outline_str = json.dumps(outline, indent=2)
         
-        compensated_min = min_word_count + 200
-        compensated_max = max_word_count + 200
+        compensated_min = min_word_count + 100
+        compensated_max = max_word_count + 100
+        
+        h3_count = sum(len(section.get("h3", [])) for section in outline.get("outline", []))
+        words_per_section = int(compensated_min / h3_count) if h3_count > 0 else 100
         
         system_msg, user_prompt = self.prompt_manager.format_prompt(
             "content_generation",
@@ -304,7 +307,8 @@ class ContentGenerator:
             entities=entities_str,
             related_searches=related_str,
             min_word_count=compensated_min,
-            max_word_count=compensated_max
+            max_word_count=compensated_max,
+            words_per_section=words_per_section
         )
         
         content = self.ai_client.generate_completion(