Fixed concurrency and model changes at job level - version 1.1.0
parent
d919ea25e1
commit
5eef4fe507
|
|
@ -0,0 +1,66 @@
|
|||
"""
|
||||
Delete all generated content for a specific project
|
||||
"""
|
||||
import sys
|
||||
from pathlib import Path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
import click
|
||||
from src.database.session import db_manager
|
||||
from src.database.models import GeneratedContent
|
||||
|
||||
def delete_project_content(project_id: int, confirm: bool = True):
|
||||
"""
|
||||
Delete all generated content for a project
|
||||
|
||||
Args:
|
||||
project_id: Project ID to delete content for
|
||||
confirm: If True, ask for confirmation before deleting
|
||||
"""
|
||||
db_manager.initialize()
|
||||
session = db_manager.get_session()
|
||||
|
||||
try:
|
||||
records = session.query(GeneratedContent).filter(
|
||||
GeneratedContent.project_id == project_id
|
||||
).all()
|
||||
|
||||
count = len(records)
|
||||
|
||||
if count == 0:
|
||||
click.echo(f"No generated content found for project {project_id}")
|
||||
return
|
||||
|
||||
click.echo(f"Found {count} generated content records for project {project_id}:")
|
||||
for record in records:
|
||||
click.echo(f" - ID {record.id}: {record.tier} - {record.title[:60]}...")
|
||||
|
||||
if confirm:
|
||||
response = click.confirm(f"\nDelete all {count} records?", default=False)
|
||||
if not response:
|
||||
click.echo("Cancelled.")
|
||||
return
|
||||
|
||||
for record in records:
|
||||
session.delete(record)
|
||||
|
||||
session.commit()
|
||||
click.echo(f"Successfully deleted {count} records for project {project_id}")
|
||||
|
||||
except Exception as e:
|
||||
session.rollback()
|
||||
click.echo(f"Error: {e}", err=True)
|
||||
raise
|
||||
finally:
|
||||
session.close()
|
||||
|
||||
@click.command()
|
||||
@click.argument('project_id', type=int)
|
||||
@click.option('--yes', '-y', is_flag=True, help='Skip confirmation prompt')
|
||||
def main(project_id: int, yes: bool):
|
||||
"""Delete all generated content for PROJECT_ID"""
|
||||
delete_project_content(project_id, confirm=not yes)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
|
|
@ -6,7 +6,7 @@ import time
|
|||
import json
|
||||
from pathlib import Path
|
||||
from typing import Optional, Dict, Any
|
||||
from openai import OpenAI, RateLimitError, APIError
|
||||
from openai import OpenAI, RateLimitError, APIError, APIConnectionError, APITimeoutError
|
||||
from src.core.config import get_config
|
||||
|
||||
AVAILABLE_MODELS = {
|
||||
|
|
@ -24,7 +24,14 @@ class AIClient:
|
|||
model: str,
|
||||
base_url: str = "https://openrouter.ai/api/v1"
|
||||
):
|
||||
self.client = OpenAI(api_key=api_key, base_url=base_url)
|
||||
self.client = OpenAI(
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
default_headers={
|
||||
"HTTP-Referer": "https://github.com/yourusername/Big-Link-Man",
|
||||
"X-Title": "Big-Link-Man"
|
||||
}
|
||||
)
|
||||
|
||||
if model in AVAILABLE_MODELS:
|
||||
self.model = AVAILABLE_MODELS[model]
|
||||
|
|
@ -72,32 +79,54 @@ class AIClient:
|
|||
kwargs["response_format"] = {"type": "json_object"}
|
||||
|
||||
retries = 3
|
||||
wait_times = [10, 20]
|
||||
|
||||
for attempt in range(retries):
|
||||
try:
|
||||
response = self.client.chat.completions.create(**kwargs)
|
||||
content = response.choices[0].message.content or ""
|
||||
# Debug: print first 200 chars if json_mode
|
||||
if json_mode:
|
||||
print(f"[DEBUG] AI Response (first 200 chars): {content[:200]}")
|
||||
return content
|
||||
|
||||
except RateLimitError as e:
|
||||
if attempt < retries - 1:
|
||||
wait_time = 2 ** attempt
|
||||
print(f"Rate limit hit. Retrying in {wait_time}s...")
|
||||
wait_time = wait_times[attempt]
|
||||
print(f"[API] Rate limit hit. Retrying in {wait_time}s... (attempt {attempt + 1}/{retries})")
|
||||
time.sleep(wait_time)
|
||||
else:
|
||||
print(f"[API] Rate limit exceeded after {retries} attempts")
|
||||
raise
|
||||
|
||||
except (APIConnectionError, APITimeoutError) as e:
|
||||
if attempt < retries - 1:
|
||||
wait_time = wait_times[attempt]
|
||||
print(f"[API] Connection/timeout error. Retrying in {wait_time}s... (attempt {attempt + 1}/{retries})")
|
||||
time.sleep(wait_time)
|
||||
else:
|
||||
print(f"[API] Connection failed after {retries} attempts")
|
||||
raise
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
if attempt < retries - 1:
|
||||
wait_time = wait_times[attempt]
|
||||
print(f"[API] Invalid JSON response (likely API error page). Retrying in {wait_time}s... (attempt {attempt + 1}/{retries})")
|
||||
time.sleep(wait_time)
|
||||
else:
|
||||
print(f"[API] Failed to get valid response after {retries} attempts")
|
||||
raise
|
||||
|
||||
except APIError as e:
|
||||
if attempt < retries - 1 and "network" in str(e).lower():
|
||||
wait_time = 2 ** attempt
|
||||
print(f"Network error. Retrying in {wait_time}s...")
|
||||
if attempt < retries - 1:
|
||||
wait_time = wait_times[attempt]
|
||||
print(f"[API] API error: {str(e)[:100]}. Retrying in {wait_time}s... (attempt {attempt + 1}/{retries})")
|
||||
time.sleep(wait_time)
|
||||
else:
|
||||
print(f"[API] API error after {retries} attempts: {str(e)[:200]}")
|
||||
raise
|
||||
|
||||
except Exception as e:
|
||||
print(f"[API] Unexpected error: {type(e).__name__}: {str(e)[:200]}")
|
||||
raise
|
||||
|
||||
return ""
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ Batch processor for content generation jobs
|
|||
from typing import Dict, Any, Optional, List
|
||||
import click
|
||||
import os
|
||||
import time
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
|
@ -45,7 +46,11 @@ class BatchProcessor:
|
|||
"total_articles": 0,
|
||||
"generated_articles": 0,
|
||||
"augmented_articles": 0,
|
||||
"failed_articles": 0
|
||||
"failed_articles": 0,
|
||||
"tier1_time": 0.0,
|
||||
"tier2_time": 0.0,
|
||||
"tier3_time": 0.0,
|
||||
"total_time": 0.0
|
||||
}
|
||||
|
||||
def process_job(
|
||||
|
|
@ -64,6 +69,8 @@ class BatchProcessor:
|
|||
continue_on_error: If True, continue on article generation failure
|
||||
auto_deploy: If True, deploy to cloud storage after generation (default: True)
|
||||
"""
|
||||
start_time = time.time()
|
||||
|
||||
job_config = JobConfig(job_file_path)
|
||||
jobs = job_config.get_jobs()
|
||||
|
||||
|
|
@ -78,6 +85,7 @@ class BatchProcessor:
|
|||
if not continue_on_error:
|
||||
raise
|
||||
|
||||
self.stats["total_time"] = time.time() - start_time
|
||||
self._print_summary()
|
||||
|
||||
def _generate_all_titles_for_tier(
|
||||
|
|
@ -175,6 +183,8 @@ class BatchProcessor:
|
|||
raise
|
||||
|
||||
for tier_name, tier_config in job.tiers.items():
|
||||
tier_start_time = time.time()
|
||||
|
||||
self._process_tier(
|
||||
job.project_id,
|
||||
tier_name,
|
||||
|
|
@ -184,6 +194,10 @@ class BatchProcessor:
|
|||
debug,
|
||||
continue_on_error
|
||||
)
|
||||
|
||||
tier_elapsed = time.time() - tier_start_time
|
||||
with self.stats_lock:
|
||||
self.stats[f"{tier_name}_time"] += tier_elapsed
|
||||
|
||||
if auto_deploy:
|
||||
try:
|
||||
|
|
@ -210,7 +224,7 @@ class BatchProcessor:
|
|||
project = self.project_repo.get_by_id(project_id)
|
||||
keyword = project.main_keyword
|
||||
|
||||
models = job.models if job.models else None
|
||||
models = tier_config.models if tier_config.models else (job.models if job.models else None)
|
||||
|
||||
click.echo(f"\n[{tier_name}] Generating {tier_config.count} titles in batches...")
|
||||
titles_file = self._generate_all_titles_for_tier(
|
||||
|
|
@ -718,4 +732,14 @@ class BatchProcessor:
|
|||
click.echo(f"Articles generated: {self.stats['generated_articles']}/{self.stats['total_articles']}")
|
||||
click.echo(f"Augmented: {self.stats['augmented_articles']}")
|
||||
click.echo(f"Failed: {self.stats['failed_articles']}")
|
||||
click.echo("")
|
||||
click.echo("TIMING")
|
||||
click.echo("-" * 60)
|
||||
if self.stats['tier1_time'] > 0:
|
||||
click.echo(f"Tier 1 Time: {self.stats['tier1_time']:.1f}s ({self.stats['tier1_time']/60:.1f}m)")
|
||||
if self.stats['tier2_time'] > 0:
|
||||
click.echo(f"Tier 2 Time: {self.stats['tier2_time']:.1f}s ({self.stats['tier2_time']/60:.1f}m)")
|
||||
if self.stats['tier3_time'] > 0:
|
||||
click.echo(f"Tier 3 Time: {self.stats['tier3_time']:.1f}s ({self.stats['tier3_time']/60:.1f}m)")
|
||||
click.echo(f"Total Time: {self.stats['total_time']:.1f}s ({self.stats['total_time']/60:.1f}m)")
|
||||
click.echo("="*60)
|
||||
|
|
@ -78,6 +78,7 @@ class TierConfig:
|
|||
min_h3_tags: int
|
||||
max_h3_tags: int
|
||||
anchor_text_config: Optional[AnchorTextConfig] = None
|
||||
models: Optional[ModelConfig] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
@ -329,6 +330,20 @@ class JobConfig:
|
|||
raise ValueError(f"'{tier_name}.anchor_text_config' custom_text must be an array")
|
||||
anchor_text_config = AnchorTextConfig(mode=mode, custom_text=custom_text)
|
||||
|
||||
# Parse tier-level models if present
|
||||
tier_models = None
|
||||
if "models" in tier_data:
|
||||
models_data = tier_data["models"]
|
||||
if not isinstance(models_data, dict):
|
||||
raise ValueError(f"'{tier_name}.models' must be an object")
|
||||
if "title" not in models_data or "outline" not in models_data or "content" not in models_data:
|
||||
raise ValueError(f"'{tier_name}.models' must have 'title', 'outline', and 'content' fields")
|
||||
tier_models = ModelConfig(
|
||||
title=models_data["title"],
|
||||
outline=models_data["outline"],
|
||||
content=models_data["content"]
|
||||
)
|
||||
|
||||
return TierConfig(
|
||||
count=tier_data.get("count", 1),
|
||||
min_word_count=tier_data.get("min_word_count", defaults["min_word_count"]),
|
||||
|
|
@ -337,7 +352,8 @@ class JobConfig:
|
|||
max_h2_tags=tier_data.get("max_h2_tags", defaults["max_h2_tags"]),
|
||||
min_h3_tags=tier_data.get("min_h3_tags", defaults["min_h3_tags"]),
|
||||
max_h3_tags=tier_data.get("max_h3_tags", defaults["max_h3_tags"]),
|
||||
anchor_text_config=anchor_text_config
|
||||
anchor_text_config=anchor_text_config,
|
||||
models=tier_models
|
||||
)
|
||||
|
||||
def _parse_tier_from_array(self, tier_name: str, tier_data: dict) -> TierConfig:
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
{
|
||||
"system_message": "You are an expert content writer who creates engaging, informative, and SEO-optimized articles that provide real value to readers while incorporating relevant keywords naturally.",
|
||||
"user_prompt": "Write a complete article based on:\nTitle: {title}\nOutline: {outline}\nKeyword: {keyword}\n\nEntities to include naturally: {entities}\nRelated searches to address: {related_searches}\n\nTarget token count range: {min_word_count} to {max_word_count} tokens.\n\nReturn as an HTML fragment with <h2>, <h3>, and <p> tags. Do NOT include <!DOCTYPE>, <html>, <head>, or <body> tags. Start directly with the first <h2> heading.\n\nWrite naturally and informatively. Incorporate the keyword, entities, and related searches organically throughout the content."
|
||||
"user_prompt": "Write a complete article based on:\nTitle: {title}\nOutline: {outline}\nKeyword: {keyword}\n\nEntities to include naturally: {entities}\nRelated searches to address: {related_searches}\n\nTarget word count range: {min_word_count} to {max_word_count} words.\n\nIMPORTANT: Write approximately {words_per_section} words per H3 section to meet the target word count. Be thorough and substantive in each section.\n\nReturn as an HTML fragment with <h2>, <h3>, and <p> tags. Do NOT include <!DOCTYPE>, <html>, <head>, or <body> tags. Start directly with the first <h2> heading.\n\nWrite naturally and informatively. Incorporate the keyword, entities, and related searches organically throughout the content."
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -293,8 +293,11 @@ class ContentGenerator:
|
|||
related_str = ", ".join(project.related_searches or [])
|
||||
outline_str = json.dumps(outline, indent=2)
|
||||
|
||||
compensated_min = min_word_count + 200
|
||||
compensated_max = max_word_count + 200
|
||||
compensated_min = min_word_count + 100
|
||||
compensated_max = max_word_count + 100
|
||||
|
||||
h3_count = sum(len(section.get("h3", [])) for section in outline.get("outline", []))
|
||||
words_per_section = int(compensated_min / h3_count) if h3_count > 0 else 100
|
||||
|
||||
system_msg, user_prompt = self.prompt_manager.format_prompt(
|
||||
"content_generation",
|
||||
|
|
@ -304,7 +307,8 @@ class ContentGenerator:
|
|||
entities=entities_str,
|
||||
related_searches=related_str,
|
||||
min_word_count=compensated_min,
|
||||
max_word_count=compensated_max
|
||||
max_word_count=compensated_max,
|
||||
words_per_section=words_per_section
|
||||
)
|
||||
|
||||
content = self.ai_client.generate_completion(
|
||||
|
|
|
|||
Loading…
Reference in New Issue