Fixed concurrency and model changes at job level - version 1.1.0
parent
d919ea25e1
commit
5eef4fe507
|
|
@ -0,0 +1,66 @@
|
||||||
|
"""
|
||||||
|
Delete all generated content for a specific project
|
||||||
|
"""
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
|
|
||||||
|
import click
|
||||||
|
from src.database.session import db_manager
|
||||||
|
from src.database.models import GeneratedContent
|
||||||
|
|
||||||
|
def delete_project_content(project_id: int, confirm: bool = True):
|
||||||
|
"""
|
||||||
|
Delete all generated content for a project
|
||||||
|
|
||||||
|
Args:
|
||||||
|
project_id: Project ID to delete content for
|
||||||
|
confirm: If True, ask for confirmation before deleting
|
||||||
|
"""
|
||||||
|
db_manager.initialize()
|
||||||
|
session = db_manager.get_session()
|
||||||
|
|
||||||
|
try:
|
||||||
|
records = session.query(GeneratedContent).filter(
|
||||||
|
GeneratedContent.project_id == project_id
|
||||||
|
).all()
|
||||||
|
|
||||||
|
count = len(records)
|
||||||
|
|
||||||
|
if count == 0:
|
||||||
|
click.echo(f"No generated content found for project {project_id}")
|
||||||
|
return
|
||||||
|
|
||||||
|
click.echo(f"Found {count} generated content records for project {project_id}:")
|
||||||
|
for record in records:
|
||||||
|
click.echo(f" - ID {record.id}: {record.tier} - {record.title[:60]}...")
|
||||||
|
|
||||||
|
if confirm:
|
||||||
|
response = click.confirm(f"\nDelete all {count} records?", default=False)
|
||||||
|
if not response:
|
||||||
|
click.echo("Cancelled.")
|
||||||
|
return
|
||||||
|
|
||||||
|
for record in records:
|
||||||
|
session.delete(record)
|
||||||
|
|
||||||
|
session.commit()
|
||||||
|
click.echo(f"Successfully deleted {count} records for project {project_id}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
session.rollback()
|
||||||
|
click.echo(f"Error: {e}", err=True)
|
||||||
|
raise
|
||||||
|
finally:
|
||||||
|
session.close()
|
||||||
|
|
||||||
|
@click.command()
|
||||||
|
@click.argument('project_id', type=int)
|
||||||
|
@click.option('--yes', '-y', is_flag=True, help='Skip confirmation prompt')
|
||||||
|
def main(project_id: int, yes: bool):
|
||||||
|
"""Delete all generated content for PROJECT_ID"""
|
||||||
|
delete_project_content(project_id, confirm=not yes)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|
||||||
|
|
@ -6,7 +6,7 @@ import time
|
||||||
import json
|
import json
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional, Dict, Any
|
from typing import Optional, Dict, Any
|
||||||
from openai import OpenAI, RateLimitError, APIError
|
from openai import OpenAI, RateLimitError, APIError, APIConnectionError, APITimeoutError
|
||||||
from src.core.config import get_config
|
from src.core.config import get_config
|
||||||
|
|
||||||
AVAILABLE_MODELS = {
|
AVAILABLE_MODELS = {
|
||||||
|
|
@ -24,7 +24,14 @@ class AIClient:
|
||||||
model: str,
|
model: str,
|
||||||
base_url: str = "https://openrouter.ai/api/v1"
|
base_url: str = "https://openrouter.ai/api/v1"
|
||||||
):
|
):
|
||||||
self.client = OpenAI(api_key=api_key, base_url=base_url)
|
self.client = OpenAI(
|
||||||
|
api_key=api_key,
|
||||||
|
base_url=base_url,
|
||||||
|
default_headers={
|
||||||
|
"HTTP-Referer": "https://github.com/yourusername/Big-Link-Man",
|
||||||
|
"X-Title": "Big-Link-Man"
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
if model in AVAILABLE_MODELS:
|
if model in AVAILABLE_MODELS:
|
||||||
self.model = AVAILABLE_MODELS[model]
|
self.model = AVAILABLE_MODELS[model]
|
||||||
|
|
@ -72,32 +79,54 @@ class AIClient:
|
||||||
kwargs["response_format"] = {"type": "json_object"}
|
kwargs["response_format"] = {"type": "json_object"}
|
||||||
|
|
||||||
retries = 3
|
retries = 3
|
||||||
|
wait_times = [10, 20]
|
||||||
|
|
||||||
for attempt in range(retries):
|
for attempt in range(retries):
|
||||||
try:
|
try:
|
||||||
response = self.client.chat.completions.create(**kwargs)
|
response = self.client.chat.completions.create(**kwargs)
|
||||||
content = response.choices[0].message.content or ""
|
content = response.choices[0].message.content or ""
|
||||||
# Debug: print first 200 chars if json_mode
|
|
||||||
if json_mode:
|
if json_mode:
|
||||||
print(f"[DEBUG] AI Response (first 200 chars): {content[:200]}")
|
print(f"[DEBUG] AI Response (first 200 chars): {content[:200]}")
|
||||||
return content
|
return content
|
||||||
|
|
||||||
except RateLimitError as e:
|
except RateLimitError as e:
|
||||||
if attempt < retries - 1:
|
if attempt < retries - 1:
|
||||||
wait_time = 2 ** attempt
|
wait_time = wait_times[attempt]
|
||||||
print(f"Rate limit hit. Retrying in {wait_time}s...")
|
print(f"[API] Rate limit hit. Retrying in {wait_time}s... (attempt {attempt + 1}/{retries})")
|
||||||
time.sleep(wait_time)
|
time.sleep(wait_time)
|
||||||
else:
|
else:
|
||||||
|
print(f"[API] Rate limit exceeded after {retries} attempts")
|
||||||
|
raise
|
||||||
|
|
||||||
|
except (APIConnectionError, APITimeoutError) as e:
|
||||||
|
if attempt < retries - 1:
|
||||||
|
wait_time = wait_times[attempt]
|
||||||
|
print(f"[API] Connection/timeout error. Retrying in {wait_time}s... (attempt {attempt + 1}/{retries})")
|
||||||
|
time.sleep(wait_time)
|
||||||
|
else:
|
||||||
|
print(f"[API] Connection failed after {retries} attempts")
|
||||||
|
raise
|
||||||
|
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
if attempt < retries - 1:
|
||||||
|
wait_time = wait_times[attempt]
|
||||||
|
print(f"[API] Invalid JSON response (likely API error page). Retrying in {wait_time}s... (attempt {attempt + 1}/{retries})")
|
||||||
|
time.sleep(wait_time)
|
||||||
|
else:
|
||||||
|
print(f"[API] Failed to get valid response after {retries} attempts")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
except APIError as e:
|
except APIError as e:
|
||||||
if attempt < retries - 1 and "network" in str(e).lower():
|
if attempt < retries - 1:
|
||||||
wait_time = 2 ** attempt
|
wait_time = wait_times[attempt]
|
||||||
print(f"Network error. Retrying in {wait_time}s...")
|
print(f"[API] API error: {str(e)[:100]}. Retrying in {wait_time}s... (attempt {attempt + 1}/{retries})")
|
||||||
time.sleep(wait_time)
|
time.sleep(wait_time)
|
||||||
else:
|
else:
|
||||||
|
print(f"[API] API error after {retries} attempts: {str(e)[:200]}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
print(f"[API] Unexpected error: {type(e).__name__}: {str(e)[:200]}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
return ""
|
return ""
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ Batch processor for content generation jobs
|
||||||
from typing import Dict, Any, Optional, List
|
from typing import Dict, Any, Optional, List
|
||||||
import click
|
import click
|
||||||
import os
|
import os
|
||||||
|
import time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
|
|
@ -45,7 +46,11 @@ class BatchProcessor:
|
||||||
"total_articles": 0,
|
"total_articles": 0,
|
||||||
"generated_articles": 0,
|
"generated_articles": 0,
|
||||||
"augmented_articles": 0,
|
"augmented_articles": 0,
|
||||||
"failed_articles": 0
|
"failed_articles": 0,
|
||||||
|
"tier1_time": 0.0,
|
||||||
|
"tier2_time": 0.0,
|
||||||
|
"tier3_time": 0.0,
|
||||||
|
"total_time": 0.0
|
||||||
}
|
}
|
||||||
|
|
||||||
def process_job(
|
def process_job(
|
||||||
|
|
@ -64,6 +69,8 @@ class BatchProcessor:
|
||||||
continue_on_error: If True, continue on article generation failure
|
continue_on_error: If True, continue on article generation failure
|
||||||
auto_deploy: If True, deploy to cloud storage after generation (default: True)
|
auto_deploy: If True, deploy to cloud storage after generation (default: True)
|
||||||
"""
|
"""
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
job_config = JobConfig(job_file_path)
|
job_config = JobConfig(job_file_path)
|
||||||
jobs = job_config.get_jobs()
|
jobs = job_config.get_jobs()
|
||||||
|
|
||||||
|
|
@ -78,6 +85,7 @@ class BatchProcessor:
|
||||||
if not continue_on_error:
|
if not continue_on_error:
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
self.stats["total_time"] = time.time() - start_time
|
||||||
self._print_summary()
|
self._print_summary()
|
||||||
|
|
||||||
def _generate_all_titles_for_tier(
|
def _generate_all_titles_for_tier(
|
||||||
|
|
@ -175,6 +183,8 @@ class BatchProcessor:
|
||||||
raise
|
raise
|
||||||
|
|
||||||
for tier_name, tier_config in job.tiers.items():
|
for tier_name, tier_config in job.tiers.items():
|
||||||
|
tier_start_time = time.time()
|
||||||
|
|
||||||
self._process_tier(
|
self._process_tier(
|
||||||
job.project_id,
|
job.project_id,
|
||||||
tier_name,
|
tier_name,
|
||||||
|
|
@ -184,6 +194,10 @@ class BatchProcessor:
|
||||||
debug,
|
debug,
|
||||||
continue_on_error
|
continue_on_error
|
||||||
)
|
)
|
||||||
|
|
||||||
|
tier_elapsed = time.time() - tier_start_time
|
||||||
|
with self.stats_lock:
|
||||||
|
self.stats[f"{tier_name}_time"] += tier_elapsed
|
||||||
|
|
||||||
if auto_deploy:
|
if auto_deploy:
|
||||||
try:
|
try:
|
||||||
|
|
@ -210,7 +224,7 @@ class BatchProcessor:
|
||||||
project = self.project_repo.get_by_id(project_id)
|
project = self.project_repo.get_by_id(project_id)
|
||||||
keyword = project.main_keyword
|
keyword = project.main_keyword
|
||||||
|
|
||||||
models = job.models if job.models else None
|
models = tier_config.models if tier_config.models else (job.models if job.models else None)
|
||||||
|
|
||||||
click.echo(f"\n[{tier_name}] Generating {tier_config.count} titles in batches...")
|
click.echo(f"\n[{tier_name}] Generating {tier_config.count} titles in batches...")
|
||||||
titles_file = self._generate_all_titles_for_tier(
|
titles_file = self._generate_all_titles_for_tier(
|
||||||
|
|
@ -718,4 +732,14 @@ class BatchProcessor:
|
||||||
click.echo(f"Articles generated: {self.stats['generated_articles']}/{self.stats['total_articles']}")
|
click.echo(f"Articles generated: {self.stats['generated_articles']}/{self.stats['total_articles']}")
|
||||||
click.echo(f"Augmented: {self.stats['augmented_articles']}")
|
click.echo(f"Augmented: {self.stats['augmented_articles']}")
|
||||||
click.echo(f"Failed: {self.stats['failed_articles']}")
|
click.echo(f"Failed: {self.stats['failed_articles']}")
|
||||||
|
click.echo("")
|
||||||
|
click.echo("TIMING")
|
||||||
|
click.echo("-" * 60)
|
||||||
|
if self.stats['tier1_time'] > 0:
|
||||||
|
click.echo(f"Tier 1 Time: {self.stats['tier1_time']:.1f}s ({self.stats['tier1_time']/60:.1f}m)")
|
||||||
|
if self.stats['tier2_time'] > 0:
|
||||||
|
click.echo(f"Tier 2 Time: {self.stats['tier2_time']:.1f}s ({self.stats['tier2_time']/60:.1f}m)")
|
||||||
|
if self.stats['tier3_time'] > 0:
|
||||||
|
click.echo(f"Tier 3 Time: {self.stats['tier3_time']:.1f}s ({self.stats['tier3_time']/60:.1f}m)")
|
||||||
|
click.echo(f"Total Time: {self.stats['total_time']:.1f}s ({self.stats['total_time']/60:.1f}m)")
|
||||||
click.echo("="*60)
|
click.echo("="*60)
|
||||||
|
|
@ -78,6 +78,7 @@ class TierConfig:
|
||||||
min_h3_tags: int
|
min_h3_tags: int
|
||||||
max_h3_tags: int
|
max_h3_tags: int
|
||||||
anchor_text_config: Optional[AnchorTextConfig] = None
|
anchor_text_config: Optional[AnchorTextConfig] = None
|
||||||
|
models: Optional[ModelConfig] = None
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|
@ -329,6 +330,20 @@ class JobConfig:
|
||||||
raise ValueError(f"'{tier_name}.anchor_text_config' custom_text must be an array")
|
raise ValueError(f"'{tier_name}.anchor_text_config' custom_text must be an array")
|
||||||
anchor_text_config = AnchorTextConfig(mode=mode, custom_text=custom_text)
|
anchor_text_config = AnchorTextConfig(mode=mode, custom_text=custom_text)
|
||||||
|
|
||||||
|
# Parse tier-level models if present
|
||||||
|
tier_models = None
|
||||||
|
if "models" in tier_data:
|
||||||
|
models_data = tier_data["models"]
|
||||||
|
if not isinstance(models_data, dict):
|
||||||
|
raise ValueError(f"'{tier_name}.models' must be an object")
|
||||||
|
if "title" not in models_data or "outline" not in models_data or "content" not in models_data:
|
||||||
|
raise ValueError(f"'{tier_name}.models' must have 'title', 'outline', and 'content' fields")
|
||||||
|
tier_models = ModelConfig(
|
||||||
|
title=models_data["title"],
|
||||||
|
outline=models_data["outline"],
|
||||||
|
content=models_data["content"]
|
||||||
|
)
|
||||||
|
|
||||||
return TierConfig(
|
return TierConfig(
|
||||||
count=tier_data.get("count", 1),
|
count=tier_data.get("count", 1),
|
||||||
min_word_count=tier_data.get("min_word_count", defaults["min_word_count"]),
|
min_word_count=tier_data.get("min_word_count", defaults["min_word_count"]),
|
||||||
|
|
@ -337,7 +352,8 @@ class JobConfig:
|
||||||
max_h2_tags=tier_data.get("max_h2_tags", defaults["max_h2_tags"]),
|
max_h2_tags=tier_data.get("max_h2_tags", defaults["max_h2_tags"]),
|
||||||
min_h3_tags=tier_data.get("min_h3_tags", defaults["min_h3_tags"]),
|
min_h3_tags=tier_data.get("min_h3_tags", defaults["min_h3_tags"]),
|
||||||
max_h3_tags=tier_data.get("max_h3_tags", defaults["max_h3_tags"]),
|
max_h3_tags=tier_data.get("max_h3_tags", defaults["max_h3_tags"]),
|
||||||
anchor_text_config=anchor_text_config
|
anchor_text_config=anchor_text_config,
|
||||||
|
models=tier_models
|
||||||
)
|
)
|
||||||
|
|
||||||
def _parse_tier_from_array(self, tier_name: str, tier_data: dict) -> TierConfig:
|
def _parse_tier_from_array(self, tier_name: str, tier_data: dict) -> TierConfig:
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
{
|
{
|
||||||
"system_message": "You are an expert content writer who creates engaging, informative, and SEO-optimized articles that provide real value to readers while incorporating relevant keywords naturally.",
|
"system_message": "You are an expert content writer who creates engaging, informative, and SEO-optimized articles that provide real value to readers while incorporating relevant keywords naturally.",
|
||||||
"user_prompt": "Write a complete article based on:\nTitle: {title}\nOutline: {outline}\nKeyword: {keyword}\n\nEntities to include naturally: {entities}\nRelated searches to address: {related_searches}\n\nTarget token count range: {min_word_count} to {max_word_count} tokens.\n\nReturn as an HTML fragment with <h2>, <h3>, and <p> tags. Do NOT include <!DOCTYPE>, <html>, <head>, or <body> tags. Start directly with the first <h2> heading.\n\nWrite naturally and informatively. Incorporate the keyword, entities, and related searches organically throughout the content."
|
"user_prompt": "Write a complete article based on:\nTitle: {title}\nOutline: {outline}\nKeyword: {keyword}\n\nEntities to include naturally: {entities}\nRelated searches to address: {related_searches}\n\nTarget word count range: {min_word_count} to {max_word_count} words.\n\nIMPORTANT: Write approximately {words_per_section} words per H3 section to meet the target word count. Be thorough and substantive in each section.\n\nReturn as an HTML fragment with <h2>, <h3>, and <p> tags. Do NOT include <!DOCTYPE>, <html>, <head>, or <body> tags. Start directly with the first <h2> heading.\n\nWrite naturally and informatively. Incorporate the keyword, entities, and related searches organically throughout the content."
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -293,8 +293,11 @@ class ContentGenerator:
|
||||||
related_str = ", ".join(project.related_searches or [])
|
related_str = ", ".join(project.related_searches or [])
|
||||||
outline_str = json.dumps(outline, indent=2)
|
outline_str = json.dumps(outline, indent=2)
|
||||||
|
|
||||||
compensated_min = min_word_count + 200
|
compensated_min = min_word_count + 100
|
||||||
compensated_max = max_word_count + 200
|
compensated_max = max_word_count + 100
|
||||||
|
|
||||||
|
h3_count = sum(len(section.get("h3", [])) for section in outline.get("outline", []))
|
||||||
|
words_per_section = int(compensated_min / h3_count) if h3_count > 0 else 100
|
||||||
|
|
||||||
system_msg, user_prompt = self.prompt_manager.format_prompt(
|
system_msg, user_prompt = self.prompt_manager.format_prompt(
|
||||||
"content_generation",
|
"content_generation",
|
||||||
|
|
@ -304,7 +307,8 @@ class ContentGenerator:
|
||||||
entities=entities_str,
|
entities=entities_str,
|
||||||
related_searches=related_str,
|
related_searches=related_str,
|
||||||
min_word_count=compensated_min,
|
min_word_count=compensated_min,
|
||||||
max_word_count=compensated_max
|
max_word_count=compensated_max,
|
||||||
|
words_per_section=words_per_section
|
||||||
)
|
)
|
||||||
|
|
||||||
content = self.ai_client.generate_completion(
|
content = self.ai_client.generate_completion(
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue