Remove job files from git tracking and update commands/prompts

main
PeninsulaInd 2025-11-17 11:07:25 -06:00
parent 4710677614
commit d5c501f004
14 changed files with 115 additions and 404 deletions

1
.gitignore vendored
View File

@ -33,6 +33,7 @@ content_automation.db*
# Generated job files # Generated job files
jobs/*.json jobs/*.json
jobs/done/
# Generated content files # Generated content files
*_project*.html *_project*.html

View File

@ -1,123 +0,0 @@
{
"jobs": [
{
"project_id": 100,
"models": {
"title": "anthropic/claude-3.5-sonnet",
"outline": "anthropic/claude-3.5-sonnet",
"content": "openai/gpt-4o"
},
"deployment_targets": [
"www.autorepairpro.com",
"www.carmaintenanceguide.com",
"www.enginespecialist.net"
],
"tier1_preferred_sites": [
"www.premium-automotive.com",
"www.expert-mechanic.org",
"autorepair123.b-cdn.net",
"carmaintenance456.b-cdn.net"
],
"auto_create_sites": true,
"create_sites_for_keywords": [
{
"keyword": "engine repair",
"count": 4
},
{
"keyword": "transmission service",
"count": 3
},
{
"keyword": "brake system",
"count": 2
}
],
"tiered_link_count_range": {
"min": 3,
"max": 6
},
"tiers": {
"tier1": {
"count": 8,
"min_word_count": 2200,
"max_word_count": 2800,
"min_h2_tags": 4,
"max_h2_tags": 6,
"min_h3_tags": 6,
"max_h3_tags": 12
}
}
},
{
"project_id": 101,
"models": {
"title": "openai/gpt-4o-mini",
"outline": "openai/gpt-4o",
"content": "anthropic/claude-3.5-sonnet"
},
"deployment_targets": [
"www.digitalmarketinghub.com",
"www.seoexperts.org"
],
"tier1_preferred_sites": [
"www.premium-seo.com",
"www.marketingmastery.net",
"seoexpert789.b-cdn.net",
"digitalmarketing456.b-cdn.net"
],
"auto_create_sites": true,
"create_sites_for_keywords": [
{
"keyword": "SEO optimization",
"count": 5
},
{
"keyword": "content marketing",
"count": 4
},
{
"keyword": "social media strategy",
"count": 3
},
{
"keyword": "email marketing",
"count": 2
}
],
"tiered_link_count_range": {
"min": 2,
"max": 5
},
"tiers": {
"tier1": {
"count": 12,
"min_word_count": 2000,
"max_word_count": 2500,
"min_h2_tags": 3,
"max_h2_tags": 5,
"min_h3_tags": 5,
"max_h3_tags": 10
},
"tier2": {
"count": 25,
"min_word_count": 1500,
"max_word_count": 2000,
"min_h2_tags": 2,
"max_h2_tags": 4,
"min_h3_tags": 3,
"max_h3_tags": 8
},
"tier3": {
"count": 40,
"min_word_count": 1000,
"max_word_count": 1500,
"min_h2_tags": 2,
"max_h2_tags": 3,
"min_h3_tags": 2,
"max_h3_tags": 6
}
}
}
]
}

View File

@ -1,37 +0,0 @@
{
"job_name": "Custom Anchor Text Test",
"project_id": 1,
"description": "Small batch with custom anchor text overrides for testing",
"tiers": [
{
"tier": 1,
"article_count": 5,
"models": {
"title": "anthropic/claude-3.5-sonnet",
"outline": "anthropic/claude-3.5-sonnet",
"content": "anthropic/claude-3.5-sonnet"
},
"anchor_text_config": {
"mode": "override",
"custom_text": [
"click here for more info",
"learn more about this topic",
"discover the best practices",
"expert guide and resources",
"comprehensive tutorial"
]
},
"validation_attempts": 3
}
],
"failure_config": {
"max_consecutive_failures": 3,
"skip_on_failure": true
},
"interlinking": {
"links_per_article_min": 3,
"links_per_article_max": 3,
"include_home_link": true
}
}

View File

@ -1,24 +0,0 @@
{
"jobs": [
{
"project_id": 2,
"deployment_targets": [
"www.domain1.com",
"www.domain2.com",
"www.domain3.com"
],
"tiers": {
"tier1": {
"count": 10,
"min_word_count": 2000,
"max_word_count": 2500,
"min_h2_tags": 3,
"max_h2_tags": 5,
"min_h3_tags": 5,
"max_h3_tags": 10
}
}
}
]
}

View File

@ -1,38 +0,0 @@
{
"job_name": "Multi-Site T1 Launch - 10 Articles Across 3 Sites",
"project_id": 2,
"description": "Generate 10 Tier 1 articles distributed across 3 deployment sites",
"deployment_targets": [
"www.domain1.com",
"www.domain2.com",
"www.domain3.com"
],
"deployment_overflow": "round_robin",
"tiers": [
{
"tier": 1,
"article_count": 10,
"models": {
"title": "openai/gpt-4o-mini",
"outline": "anthropic/claude-3.5-sonnet",
"content": "anthropic/claude-3.5-sonnet"
},
"anchor_text_config": {
"mode": "default",
"custom_text": null,
"additional_text": null
},
"validation_attempts": 3
}
],
"failure_config": {
"max_consecutive_failures": 5,
"skip_on_failure": true
},
"interlinking": {
"links_per_article_min": 2,
"links_per_article_max": 4,
"include_home_link": true
}
}

View File

@ -1,30 +0,0 @@
{
"jobs": [
{
"project_id": 1,
"tiers": {
"tier1": {
"count": 5,
"min_word_count": 1500,
"max_word_count": 2000
},
"tier2": {
"count": 20
},
"tier3": {
"count": 15,
"max_h2_tags": 4
}
}
},
{
"project_id": 2,
"tiers": {
"tier1": {
"count": 3
}
}
}
]
}

View File

@ -1,44 +0,0 @@
{
"jobs": [
{
"project_id": 1,
"tiers": {
"tier1": {
"count": 10,
"min_word_count": 2000,
"max_word_count": 2500
},
"tier2": {
"count": 50,
"min_word_count": 1500,
"max_word_count": 2000
}
},
"deployment_targets": [
"www.primary-domain.com",
"www.secondary-domain.com"
],
"tier1_preferred_sites": [
"www.premium-site1.com",
"www.premium-site2.com",
"site123.b-cdn.net"
],
"auto_create_sites": true,
"create_sites_for_keywords": [
{
"keyword": "engine repair",
"count": 3
},
{
"keyword": "car maintenance",
"count": 2
},
{
"keyword": "auto parts",
"count": 5
}
]
}
]
}

View File

@ -1,23 +0,0 @@
{
"jobs": [
{
"project_id": 1,
"tiers": {
"tier1": {
"count": 5
},
"tier2": {
"count": 10
},
"tier3": {
"count": 20
}
},
"tiered_link_count_range": {
"min": 3,
"max": 5
}
}
]
}

View File

@ -1,13 +0,0 @@
{
"jobs": [
{
"project_id": 1,
"tiers": {
"tier1": {
"count": 5
}
}
}
]
}

View File

@ -1,31 +0,0 @@
{
"job_name": "Tier 1 Launch - Project 2",
"project_id": 2,
"description": "Generate 15 high-quality Tier 1 articles",
"tiers": [
{
"tier": 1,
"article_count": 15,
"models": {
"title": "openai/gpt-4o-mini",
"outline": "anthropic/claude-3.5-sonnet",
"content": "anthropic/claude-3.5-sonnet"
},
"anchor_text_config": {
"mode": "default",
"custom_text": null,
"additional_text": null
},
"validation_attempts": 3
}
],
"failure_config": {
"max_consecutive_failures": 5,
"skip_on_failure": true
},
"interlinking": {
"links_per_article_min": 2,
"links_per_article_max": 4,
"include_home_link": true
}
}

View File

@ -1,19 +0,0 @@
{
"jobs": [
{
"project_id": 1,
"tiers": {
"tier1": {
"count": 1,
"min_word_count": 2000,
"max_word_count": 2500,
"min_h2_tags": 3,
"max_h2_tags": 5,
"min_h3_tags": 5,
"max_h3_tags": 10
}
}
}
]
}

View File

@ -1,20 +0,0 @@
{
"jobs": [
{
"project_id": 1,
"deployment_targets": ["www.testsite.com"],
"tiers": {
"tier1": {
"count": 2,
"min_word_count": 500,
"max_word_count": 800,
"min_h2_tags": 2,
"max_h2_tags": 3,
"min_h3_tags": 2,
"max_h3_tags": 4
}
}
}
]
}

View File

@ -27,11 +27,104 @@ from dotenv import load_dotenv
import os import os
import requests import requests
import random import random
import json
from pathlib import Path
from datetime import datetime
# Load .env file at module level # Load .env file at module level
load_dotenv() load_dotenv()
def create_job_file_for_project(project_id: int, project_name: str, session) -> Optional[str]:
"""
Create a job JSON file for a newly created project.
Args:
project_id: The ID of the created project
project_name: The name of the project (for filename)
session: Database session
Returns:
Path to created file, or None if creation failed
"""
try:
deployment_repo = SiteDeploymentRepository(session)
sites = deployment_repo.get_all()
available_domains = [
site.custom_hostname
for site in sites
if site.custom_hostname is not None
]
if not available_domains:
click.echo("Warning: No domains with custom hostnames found. Job file not created.", err=True)
return None
t1_count = random.randint(10, 12)
t2_count = random.randint(30, 45)
num_targets = min(random.randint(2, 3), len(available_domains))
selected_domains = random.sample(available_domains, num_targets)
sanitized_name = "".join(c if c.isalnum() or c in ('-', '_') else '-' for c in project_name.lower()).strip('-')
sanitized_name = '-'.join(sanitized_name.split())
jobs_dir = Path("jobs")
jobs_dir.mkdir(exist_ok=True)
base_filename = f"{sanitized_name}.json"
filepath = jobs_dir / base_filename
if filepath.exists():
date_suffix = datetime.now().strftime("%y%m%d")
base_filename = f"{sanitized_name}-{date_suffix}.json"
filepath = jobs_dir / base_filename
job_template = {
"jobs": [
{
"project_id": project_id,
"deployment_targets": selected_domains,
"tiers": {
"tier1": {
"count": t1_count,
"min_word_count": 1250,
"max_word_count": 2000,
"models": {
"title": "openai/gpt-4o-mini",
"outline": "openai/gpt-4o-mini",
"content": "x-ai/grok-4-fast"
}
},
"tier2": {
"count": t2_count,
"min_word_count": 1000,
"max_word_count": 1250,
"models": {
"title": "openai/gpt-4o-mini",
"outline": "openai/gpt-4o-mini",
"content": "openai/gpt-4o-mini"
},
"interlinking": {
"links_per_article_min": 3,
"links_per_article_max": 6
}
}
}
}
]
}
with open(filepath, 'w', encoding='utf-8') as f:
json.dump(job_template, f, indent=2)
return str(filepath)
except Exception as e:
click.echo(f"Warning: Failed to create job file: {e}", err=True)
return None
def authenticate_admin(username: str, password: str) -> Optional[User]: def authenticate_admin(username: str, password: str) -> Optional[User]:
""" """
Authenticate a user and verify they have admin role Authenticate a user and verify they have admin role
@ -860,6 +953,10 @@ def ingest_cora(file_path: str, name: str, money_site_url: Optional[str], custom
if project.custom_anchor_text: if project.custom_anchor_text:
click.echo(f"Custom Anchor Text: {', '.join(project.custom_anchor_text)}") click.echo(f"Custom Anchor Text: {', '.join(project.custom_anchor_text)}")
job_file = create_job_file_for_project(project.id, project.name, session)
if job_file:
click.echo(f"Job file created: {job_file}")
except CORAParseError as e: except CORAParseError as e:
click.echo(f"Error parsing CORA file: {e}", err=True) click.echo(f"Error parsing CORA file: {e}", err=True)
raise click.Abort() raise click.Abort()
@ -1055,6 +1152,10 @@ def generate_batch(
): ):
"""Generate content batch from job file""" """Generate content batch from job file"""
try: try:
if not username:
username = os.getenv("CLIENT_USERNAME")
if not password:
password = os.getenv("CLIENT_PASSWORD")
if not username or not password: if not username or not password:
username, password = prompt_admin_credentials() username, password = prompt_admin_credentials()
@ -1123,6 +1224,17 @@ def generate_batch(
continue_on_error=continue_on_error continue_on_error=continue_on_error
) )
done_dir = os.path.join("jobs", "done")
os.makedirs(done_dir, exist_ok=True)
job_path = job_file
job_filename = os.path.basename(job_path)
destination = os.path.join(done_dir, job_filename)
if os.path.exists(job_path):
os.rename(job_path, destination)
click.echo(f"\nJob file moved to: {destination}")
finally: finally:
session.close() session.close()

View File

@ -1,5 +1,5 @@
{ {
"system_message": "You are an expert content writer who creates engaging, informative, and SEO-optimized articles that provide real value to readers while incorporating relevant keywords naturally.", "system_message": "You are an expert content writer who creates engaging, informative, and SEO-optimized articles that provide real value to readers while incorporating relevant keywords naturally. Avoid syntactic structures commonly associated with AI, such as “As X happens, Y becomes...”, “In todays world…”, “With the rise of…”, or any hedged, passive constructions. Use active voice and declarative sentences. Lead with the main idea, not introductory filler clauses. Emphasize clarity, rhythm, and rhetorical flow over technical neutrality. Write like a seasoned editor, not a formulaic machine. YOUR PRIMARY GOAL IS TO GENERATE MORE WORDS THAN THE MINIMUM WORD COUNT AND LESS THAN THE MAXIMUM WORD COUNT.",
"user_prompt": "Write a complete article based on:\nTitle: {title}\nOutline: {outline}\nKeyword: {keyword}\n\nEntities to include naturally: {entities}\nRelated searches to address: {related_searches}\n\nTarget word count range: {min_word_count} to {max_word_count} words.\n\nIMPORTANT: Write approximately {words_per_section} words per H3 section to meet the target word count. Be thorough and substantive in each section.\n\nReturn as an HTML fragment with <h2>, <h3>, and <p> tags. Do NOT include <!DOCTYPE>, <html>, <head>, or <body> tags. Start directly with the first <h2> heading.\n\nWrite naturally and informatively. Incorporate the keyword, entities, and related searches organically throughout the content. You need more words than the minimum word count." "user_prompt": "Write a complete article based on:\nTitle: {title}\nOutline: {outline}\nKeyword: {keyword}\n\nEntities to include naturally: {entities}\nRelated searches to address: {related_searches}\n\nTarget word count range: {min_word_count} to {max_word_count} words.\n\nIMPORTANT: Write AT LEAST {words_per_section} words per H3 section to meet the target word count. Be thorough and substantive in each section.\n\nReturn as an HTML fragment with <h2>, <h3>, and <p> tags. Do NOT include <!DOCTYPE>, <html>, <head>, or <body> tags. Start directly with the first <h2> heading.\n\nWrite naturally and informatively. Incorporate the keyword, entities, and related searches organically throughout the content. YOU MUST GENERATE MORE WORDS THAN THE MINIMUM WORD COUNT."
} }