Fixed concurrency and model changes at job level - version 1.1.0

main
PeninsulaInd 2025-10-27 14:37:47 -05:00
parent d919ea25e1
commit 5eef4fe507
6 changed files with 154 additions and 15 deletions

View File

@ -0,0 +1,66 @@
"""
Delete all generated content for a specific project
"""
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent))
import click
from src.database.session import db_manager
from src.database.models import GeneratedContent
def delete_project_content(project_id: int, confirm: bool = True):
"""
Delete all generated content for a project
Args:
project_id: Project ID to delete content for
confirm: If True, ask for confirmation before deleting
"""
db_manager.initialize()
session = db_manager.get_session()
try:
records = session.query(GeneratedContent).filter(
GeneratedContent.project_id == project_id
).all()
count = len(records)
if count == 0:
click.echo(f"No generated content found for project {project_id}")
return
click.echo(f"Found {count} generated content records for project {project_id}:")
for record in records:
click.echo(f" - ID {record.id}: {record.tier} - {record.title[:60]}...")
if confirm:
response = click.confirm(f"\nDelete all {count} records?", default=False)
if not response:
click.echo("Cancelled.")
return
for record in records:
session.delete(record)
session.commit()
click.echo(f"Successfully deleted {count} records for project {project_id}")
except Exception as e:
session.rollback()
click.echo(f"Error: {e}", err=True)
raise
finally:
session.close()
@click.command()
@click.argument('project_id', type=int)
@click.option('--yes', '-y', is_flag=True, help='Skip confirmation prompt')
def main(project_id: int, yes: bool):
"""Delete all generated content for PROJECT_ID"""
delete_project_content(project_id, confirm=not yes)
if __name__ == "__main__":
main()

View File

@ -6,7 +6,7 @@ import time
import json
from pathlib import Path
from typing import Optional, Dict, Any
from openai import OpenAI, RateLimitError, APIError
from openai import OpenAI, RateLimitError, APIError, APIConnectionError, APITimeoutError
from src.core.config import get_config
AVAILABLE_MODELS = {
@ -24,7 +24,14 @@ class AIClient:
model: str,
base_url: str = "https://openrouter.ai/api/v1"
):
self.client = OpenAI(api_key=api_key, base_url=base_url)
self.client = OpenAI(
api_key=api_key,
base_url=base_url,
default_headers={
"HTTP-Referer": "https://github.com/yourusername/Big-Link-Man",
"X-Title": "Big-Link-Man"
}
)
if model in AVAILABLE_MODELS:
self.model = AVAILABLE_MODELS[model]
@ -72,32 +79,54 @@ class AIClient:
kwargs["response_format"] = {"type": "json_object"}
retries = 3
wait_times = [10, 20]
for attempt in range(retries):
try:
response = self.client.chat.completions.create(**kwargs)
content = response.choices[0].message.content or ""
# Debug: print first 200 chars if json_mode
if json_mode:
print(f"[DEBUG] AI Response (first 200 chars): {content[:200]}")
return content
except RateLimitError as e:
if attempt < retries - 1:
wait_time = 2 ** attempt
print(f"Rate limit hit. Retrying in {wait_time}s...")
wait_time = wait_times[attempt]
print(f"[API] Rate limit hit. Retrying in {wait_time}s... (attempt {attempt + 1}/{retries})")
time.sleep(wait_time)
else:
print(f"[API] Rate limit exceeded after {retries} attempts")
raise
except (APIConnectionError, APITimeoutError) as e:
if attempt < retries - 1:
wait_time = wait_times[attempt]
print(f"[API] Connection/timeout error. Retrying in {wait_time}s... (attempt {attempt + 1}/{retries})")
time.sleep(wait_time)
else:
print(f"[API] Connection failed after {retries} attempts")
raise
except json.JSONDecodeError as e:
if attempt < retries - 1:
wait_time = wait_times[attempt]
print(f"[API] Invalid JSON response (likely API error page). Retrying in {wait_time}s... (attempt {attempt + 1}/{retries})")
time.sleep(wait_time)
else:
print(f"[API] Failed to get valid response after {retries} attempts")
raise
except APIError as e:
if attempt < retries - 1 and "network" in str(e).lower():
wait_time = 2 ** attempt
print(f"Network error. Retrying in {wait_time}s...")
if attempt < retries - 1:
wait_time = wait_times[attempt]
print(f"[API] API error: {str(e)[:100]}. Retrying in {wait_time}s... (attempt {attempt + 1}/{retries})")
time.sleep(wait_time)
else:
print(f"[API] API error after {retries} attempts: {str(e)[:200]}")
raise
except Exception as e:
print(f"[API] Unexpected error: {type(e).__name__}: {str(e)[:200]}")
raise
return ""

View File

@ -5,6 +5,7 @@ Batch processor for content generation jobs
from typing import Dict, Any, Optional, List
import click
import os
import time
from pathlib import Path
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed
@ -45,7 +46,11 @@ class BatchProcessor:
"total_articles": 0,
"generated_articles": 0,
"augmented_articles": 0,
"failed_articles": 0
"failed_articles": 0,
"tier1_time": 0.0,
"tier2_time": 0.0,
"tier3_time": 0.0,
"total_time": 0.0
}
def process_job(
@ -64,6 +69,8 @@ class BatchProcessor:
continue_on_error: If True, continue on article generation failure
auto_deploy: If True, deploy to cloud storage after generation (default: True)
"""
start_time = time.time()
job_config = JobConfig(job_file_path)
jobs = job_config.get_jobs()
@ -78,6 +85,7 @@ class BatchProcessor:
if not continue_on_error:
raise
self.stats["total_time"] = time.time() - start_time
self._print_summary()
def _generate_all_titles_for_tier(
@ -175,6 +183,8 @@ class BatchProcessor:
raise
for tier_name, tier_config in job.tiers.items():
tier_start_time = time.time()
self._process_tier(
job.project_id,
tier_name,
@ -184,6 +194,10 @@ class BatchProcessor:
debug,
continue_on_error
)
tier_elapsed = time.time() - tier_start_time
with self.stats_lock:
self.stats[f"{tier_name}_time"] += tier_elapsed
if auto_deploy:
try:
@ -210,7 +224,7 @@ class BatchProcessor:
project = self.project_repo.get_by_id(project_id)
keyword = project.main_keyword
models = job.models if job.models else None
models = tier_config.models if tier_config.models else (job.models if job.models else None)
click.echo(f"\n[{tier_name}] Generating {tier_config.count} titles in batches...")
titles_file = self._generate_all_titles_for_tier(
@ -718,4 +732,14 @@ class BatchProcessor:
click.echo(f"Articles generated: {self.stats['generated_articles']}/{self.stats['total_articles']}")
click.echo(f"Augmented: {self.stats['augmented_articles']}")
click.echo(f"Failed: {self.stats['failed_articles']}")
click.echo("")
click.echo("TIMING")
click.echo("-" * 60)
if self.stats['tier1_time'] > 0:
click.echo(f"Tier 1 Time: {self.stats['tier1_time']:.1f}s ({self.stats['tier1_time']/60:.1f}m)")
if self.stats['tier2_time'] > 0:
click.echo(f"Tier 2 Time: {self.stats['tier2_time']:.1f}s ({self.stats['tier2_time']/60:.1f}m)")
if self.stats['tier3_time'] > 0:
click.echo(f"Tier 3 Time: {self.stats['tier3_time']:.1f}s ({self.stats['tier3_time']/60:.1f}m)")
click.echo(f"Total Time: {self.stats['total_time']:.1f}s ({self.stats['total_time']/60:.1f}m)")
click.echo("="*60)

View File

@ -78,6 +78,7 @@ class TierConfig:
min_h3_tags: int
max_h3_tags: int
anchor_text_config: Optional[AnchorTextConfig] = None
models: Optional[ModelConfig] = None
@dataclass
@ -329,6 +330,20 @@ class JobConfig:
raise ValueError(f"'{tier_name}.anchor_text_config' custom_text must be an array")
anchor_text_config = AnchorTextConfig(mode=mode, custom_text=custom_text)
# Parse tier-level models if present
tier_models = None
if "models" in tier_data:
models_data = tier_data["models"]
if not isinstance(models_data, dict):
raise ValueError(f"'{tier_name}.models' must be an object")
if "title" not in models_data or "outline" not in models_data or "content" not in models_data:
raise ValueError(f"'{tier_name}.models' must have 'title', 'outline', and 'content' fields")
tier_models = ModelConfig(
title=models_data["title"],
outline=models_data["outline"],
content=models_data["content"]
)
return TierConfig(
count=tier_data.get("count", 1),
min_word_count=tier_data.get("min_word_count", defaults["min_word_count"]),
@ -337,7 +352,8 @@ class JobConfig:
max_h2_tags=tier_data.get("max_h2_tags", defaults["max_h2_tags"]),
min_h3_tags=tier_data.get("min_h3_tags", defaults["min_h3_tags"]),
max_h3_tags=tier_data.get("max_h3_tags", defaults["max_h3_tags"]),
anchor_text_config=anchor_text_config
anchor_text_config=anchor_text_config,
models=tier_models
)
def _parse_tier_from_array(self, tier_name: str, tier_data: dict) -> TierConfig:

View File

@ -1,5 +1,5 @@
{
"system_message": "You are an expert content writer who creates engaging, informative, and SEO-optimized articles that provide real value to readers while incorporating relevant keywords naturally.",
"user_prompt": "Write a complete article based on:\nTitle: {title}\nOutline: {outline}\nKeyword: {keyword}\n\nEntities to include naturally: {entities}\nRelated searches to address: {related_searches}\n\nTarget token count range: {min_word_count} to {max_word_count} tokens.\n\nReturn as an HTML fragment with <h2>, <h3>, and <p> tags. Do NOT include <!DOCTYPE>, <html>, <head>, or <body> tags. Start directly with the first <h2> heading.\n\nWrite naturally and informatively. Incorporate the keyword, entities, and related searches organically throughout the content."
"user_prompt": "Write a complete article based on:\nTitle: {title}\nOutline: {outline}\nKeyword: {keyword}\n\nEntities to include naturally: {entities}\nRelated searches to address: {related_searches}\n\nTarget word count range: {min_word_count} to {max_word_count} words.\n\nIMPORTANT: Write approximately {words_per_section} words per H3 section to meet the target word count. Be thorough and substantive in each section.\n\nReturn as an HTML fragment with <h2>, <h3>, and <p> tags. Do NOT include <!DOCTYPE>, <html>, <head>, or <body> tags. Start directly with the first <h2> heading.\n\nWrite naturally and informatively. Incorporate the keyword, entities, and related searches organically throughout the content."
}

View File

@ -293,8 +293,11 @@ class ContentGenerator:
related_str = ", ".join(project.related_searches or [])
outline_str = json.dumps(outline, indent=2)
compensated_min = min_word_count + 200
compensated_max = max_word_count + 200
compensated_min = min_word_count + 100
compensated_max = max_word_count + 100
h3_count = sum(len(section.get("h3", [])) for section in outline.get("outline", []))
words_per_section = int(compensated_min / h3_count) if h3_count > 0 else 100
system_msg, user_prompt = self.prompt_manager.format_prompt(
"content_generation",
@ -304,7 +307,8 @@ class ContentGenerator:
entities=entities_str,
related_searches=related_str,
min_word_count=compensated_min,
max_word_count=compensated_max
max_word_count=compensated_max,
words_per_section=words_per_section
)
content = self.ai_client.generate_completion(