151 lines
5.2 KiB
Python
151 lines
5.2 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Helper script to create job configuration files programmatically
|
|
"""
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
sys.path.insert(0, str(Path(__file__).parent / "src"))
|
|
|
|
from generation.job_config import JobConfig, TierConfig, ModelConfig, AnchorTextConfig, FailureConfig, InterlinkingConfig
|
|
|
|
def create_tier1_job(project_id: int, article_count: int = 15):
|
|
"""Create a Tier 1 job configuration"""
|
|
|
|
job = JobConfig(
|
|
job_name=f"Tier 1 Launch - Project {project_id}",
|
|
project_id=project_id,
|
|
description=f"Generate {article_count} high-quality Tier 1 articles",
|
|
tiers=[
|
|
TierConfig(
|
|
tier=1,
|
|
article_count=article_count,
|
|
models=ModelConfig(
|
|
title="openai/gpt-4o-mini",
|
|
outline="anthropic/claude-3.5-sonnet",
|
|
content="anthropic/claude-3.5-sonnet"
|
|
),
|
|
anchor_text_config=AnchorTextConfig(mode="default"),
|
|
validation_attempts=3
|
|
)
|
|
],
|
|
failure_config=FailureConfig(
|
|
max_consecutive_failures=5,
|
|
skip_on_failure=True
|
|
),
|
|
interlinking=InterlinkingConfig(
|
|
links_per_article_min=2,
|
|
links_per_article_max=4,
|
|
include_home_link=True
|
|
)
|
|
)
|
|
|
|
return job
|
|
|
|
def create_multi_tier_job(project_id: int, tier_counts: list):
|
|
"""
|
|
Create a multi-tier job configuration
|
|
|
|
Args:
|
|
project_id: Project ID
|
|
tier_counts: List of article counts for each tier, e.g. [15, 50, 100]
|
|
"""
|
|
tiers = []
|
|
|
|
for tier_num, article_count in enumerate(tier_counts, start=1):
|
|
if tier_num == 1:
|
|
# Tier 1: Highest quality models, strict validation
|
|
tier_config = TierConfig(
|
|
tier=tier_num,
|
|
article_count=article_count,
|
|
models=ModelConfig(
|
|
title="openai/gpt-4o-mini",
|
|
outline="anthropic/claude-3.5-sonnet",
|
|
content="anthropic/claude-3.5-sonnet"
|
|
),
|
|
validation_attempts=3
|
|
)
|
|
else:
|
|
# Tier 2+: Faster/cheaper models, relaxed validation
|
|
tier_config = TierConfig(
|
|
tier=tier_num,
|
|
article_count=article_count,
|
|
models=ModelConfig(
|
|
title="openai/gpt-4o-mini",
|
|
outline="openai/gpt-4o-mini",
|
|
content="openai/gpt-4o-mini"
|
|
),
|
|
validation_attempts=2
|
|
)
|
|
|
|
tiers.append(tier_config)
|
|
|
|
total_articles = sum(tier_counts)
|
|
|
|
job = JobConfig(
|
|
job_name=f"Multi-Tier Build - Project {project_id}",
|
|
project_id=project_id,
|
|
description=f"Site build with {total_articles} articles across {len(tier_counts)} tiers",
|
|
tiers=tiers,
|
|
failure_config=FailureConfig(
|
|
max_consecutive_failures=10,
|
|
skip_on_failure=True
|
|
),
|
|
interlinking=InterlinkingConfig(
|
|
links_per_article_min=2,
|
|
links_per_article_max=4,
|
|
include_home_link=True
|
|
)
|
|
)
|
|
|
|
return job
|
|
|
|
if __name__ == "__main__":
|
|
# Example usage
|
|
if len(sys.argv) < 3:
|
|
print("Usage:")
|
|
print(" python create_job_config.py <project_id> tier1 <article_count>")
|
|
print(" python create_job_config.py <project_id> multi <tier1_count> <tier2_count> [tier3_count] ...")
|
|
print("\nExamples:")
|
|
print(" python create_job_config.py 1 tier1 15")
|
|
print(" python create_job_config.py 1 multi 15 50 100")
|
|
print(" python create_job_config.py 1 multi 10 25")
|
|
sys.exit(1)
|
|
|
|
project_id = int(sys.argv[1])
|
|
job_type = sys.argv[2]
|
|
|
|
if job_type == "tier1":
|
|
if len(sys.argv) < 4:
|
|
print("Error: tier1 requires article_count")
|
|
print("Example: python create_job_config.py 1 tier1 15")
|
|
sys.exit(1)
|
|
article_count = int(sys.argv[3])
|
|
job = create_tier1_job(project_id, article_count)
|
|
filename = f"jobs/project_{project_id}_tier1_{article_count}articles.json"
|
|
elif job_type == "multi":
|
|
if len(sys.argv) < 4:
|
|
print("Error: multi requires at least one tier count")
|
|
print("Example: python create_job_config.py 1 multi 15 50 100")
|
|
sys.exit(1)
|
|
tier_counts = [int(count) for count in sys.argv[3:]]
|
|
job = create_multi_tier_job(project_id, tier_counts)
|
|
total = sum(tier_counts)
|
|
filename = f"jobs/project_{project_id}_multi_{len(tier_counts)}tiers_{total}articles.json"
|
|
else:
|
|
print(f"Unknown job type: {job_type}")
|
|
print("Use 'tier1' or 'multi'")
|
|
sys.exit(1)
|
|
|
|
job.to_file(filename)
|
|
print(f"Created job configuration: {filename}")
|
|
print(f"\nJob details:")
|
|
print(f" Project ID: {project_id}")
|
|
print(f" Total articles: {job.get_total_articles()}")
|
|
if job_type == "multi":
|
|
for tier in job.tiers:
|
|
print(f" Tier {tier.tier}: {tier.article_count} articles")
|
|
print(f"\nTo run this job:")
|
|
print(f" uv run python main.py generate-batch -j {filename} -u admin -p yourpassword")
|
|
|