From d5c501f0046df8032b8acbfe51c1c9ae79505862 Mon Sep 17 00:00:00 2001 From: PeninsulaInd Date: Mon, 17 Nov 2025 11:07:25 -0600 Subject: [PATCH] Remove job files from git tracking and update commands/prompts --- .gitignore | 1 + jobs/EXAMPLE_MULTI.json | 123 ------------------ jobs/example_custom_anchors.json | 37 ------ jobs/example_deployment_targets.json | 24 ---- jobs/example_multi_site_deployment.json | 38 ------ jobs/example_multi_tier_batch.json | 30 ----- jobs/example_story_3.1_full_features.json | 44 ------- jobs/example_story_3.2_tiered_links.json | 23 ---- jobs/example_tier1_batch.json | 13 -- jobs/project_1_tier1_5articles.json | 31 ----- jobs/test_augmentation.json | 19 --- jobs/test_integration.json | 20 --- src/cli/commands.py | 112 ++++++++++++++++ .../prompts/content_generation.json | 4 +- 14 files changed, 115 insertions(+), 404 deletions(-) delete mode 100644 jobs/EXAMPLE_MULTI.json delete mode 100644 jobs/example_custom_anchors.json delete mode 100644 jobs/example_deployment_targets.json delete mode 100644 jobs/example_multi_site_deployment.json delete mode 100644 jobs/example_multi_tier_batch.json delete mode 100644 jobs/example_story_3.1_full_features.json delete mode 100644 jobs/example_story_3.2_tiered_links.json delete mode 100644 jobs/example_tier1_batch.json delete mode 100644 jobs/project_1_tier1_5articles.json delete mode 100644 jobs/test_augmentation.json delete mode 100644 jobs/test_integration.json diff --git a/.gitignore b/.gitignore index d2fdc11..1901d7f 100644 --- a/.gitignore +++ b/.gitignore @@ -33,6 +33,7 @@ content_automation.db* # Generated job files jobs/*.json +jobs/done/ # Generated content files *_project*.html diff --git a/jobs/EXAMPLE_MULTI.json b/jobs/EXAMPLE_MULTI.json deleted file mode 100644 index 6fe797e..0000000 --- a/jobs/EXAMPLE_MULTI.json +++ /dev/null @@ -1,123 +0,0 @@ -{ - "jobs": [ - { - "project_id": 100, - "models": { - "title": "anthropic/claude-3.5-sonnet", - "outline": "anthropic/claude-3.5-sonnet", - "content": "openai/gpt-4o" - }, - "deployment_targets": [ - "www.autorepairpro.com", - "www.carmaintenanceguide.com", - "www.enginespecialist.net" - ], - "tier1_preferred_sites": [ - "www.premium-automotive.com", - "www.expert-mechanic.org", - "autorepair123.b-cdn.net", - "carmaintenance456.b-cdn.net" - ], - "auto_create_sites": true, - "create_sites_for_keywords": [ - { - "keyword": "engine repair", - "count": 4 - }, - { - "keyword": "transmission service", - "count": 3 - }, - { - "keyword": "brake system", - "count": 2 - } - ], - "tiered_link_count_range": { - "min": 3, - "max": 6 - }, - "tiers": { - "tier1": { - "count": 8, - "min_word_count": 2200, - "max_word_count": 2800, - "min_h2_tags": 4, - "max_h2_tags": 6, - "min_h3_tags": 6, - "max_h3_tags": 12 - } - } - }, - { - "project_id": 101, - "models": { - "title": "openai/gpt-4o-mini", - "outline": "openai/gpt-4o", - "content": "anthropic/claude-3.5-sonnet" - }, - "deployment_targets": [ - "www.digitalmarketinghub.com", - "www.seoexperts.org" - ], - "tier1_preferred_sites": [ - "www.premium-seo.com", - "www.marketingmastery.net", - "seoexpert789.b-cdn.net", - "digitalmarketing456.b-cdn.net" - ], - "auto_create_sites": true, - "create_sites_for_keywords": [ - { - "keyword": "SEO optimization", - "count": 5 - }, - { - "keyword": "content marketing", - "count": 4 - }, - { - "keyword": "social media strategy", - "count": 3 - }, - { - "keyword": "email marketing", - "count": 2 - } - ], - "tiered_link_count_range": { - "min": 2, - "max": 5 - }, - "tiers": { - "tier1": { - "count": 12, - "min_word_count": 2000, - "max_word_count": 2500, - "min_h2_tags": 3, - "max_h2_tags": 5, - "min_h3_tags": 5, - "max_h3_tags": 10 - }, - "tier2": { - "count": 25, - "min_word_count": 1500, - "max_word_count": 2000, - "min_h2_tags": 2, - "max_h2_tags": 4, - "min_h3_tags": 3, - "max_h3_tags": 8 - }, - "tier3": { - "count": 40, - "min_word_count": 1000, - "max_word_count": 1500, - "min_h2_tags": 2, - "max_h2_tags": 3, - "min_h3_tags": 2, - "max_h3_tags": 6 - } - } - } - ] -} diff --git a/jobs/example_custom_anchors.json b/jobs/example_custom_anchors.json deleted file mode 100644 index 639a6e4..0000000 --- a/jobs/example_custom_anchors.json +++ /dev/null @@ -1,37 +0,0 @@ -{ - "job_name": "Custom Anchor Text Test", - "project_id": 1, - "description": "Small batch with custom anchor text overrides for testing", - "tiers": [ - { - "tier": 1, - "article_count": 5, - "models": { - "title": "anthropic/claude-3.5-sonnet", - "outline": "anthropic/claude-3.5-sonnet", - "content": "anthropic/claude-3.5-sonnet" - }, - "anchor_text_config": { - "mode": "override", - "custom_text": [ - "click here for more info", - "learn more about this topic", - "discover the best practices", - "expert guide and resources", - "comprehensive tutorial" - ] - }, - "validation_attempts": 3 - } - ], - "failure_config": { - "max_consecutive_failures": 3, - "skip_on_failure": true - }, - "interlinking": { - "links_per_article_min": 3, - "links_per_article_max": 3, - "include_home_link": true - } -} - diff --git a/jobs/example_deployment_targets.json b/jobs/example_deployment_targets.json deleted file mode 100644 index 610a61c..0000000 --- a/jobs/example_deployment_targets.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "jobs": [ - { - "project_id": 2, - "deployment_targets": [ - "www.domain1.com", - "www.domain2.com", - "www.domain3.com" - ], - "tiers": { - "tier1": { - "count": 10, - "min_word_count": 2000, - "max_word_count": 2500, - "min_h2_tags": 3, - "max_h2_tags": 5, - "min_h3_tags": 5, - "max_h3_tags": 10 - } - } - } - ] -} - diff --git a/jobs/example_multi_site_deployment.json b/jobs/example_multi_site_deployment.json deleted file mode 100644 index 1f99f77..0000000 --- a/jobs/example_multi_site_deployment.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "job_name": "Multi-Site T1 Launch - 10 Articles Across 3 Sites", - "project_id": 2, - "description": "Generate 10 Tier 1 articles distributed across 3 deployment sites", - "deployment_targets": [ - "www.domain1.com", - "www.domain2.com", - "www.domain3.com" - ], - "deployment_overflow": "round_robin", - "tiers": [ - { - "tier": 1, - "article_count": 10, - "models": { - "title": "openai/gpt-4o-mini", - "outline": "anthropic/claude-3.5-sonnet", - "content": "anthropic/claude-3.5-sonnet" - }, - "anchor_text_config": { - "mode": "default", - "custom_text": null, - "additional_text": null - }, - "validation_attempts": 3 - } - ], - "failure_config": { - "max_consecutive_failures": 5, - "skip_on_failure": true - }, - "interlinking": { - "links_per_article_min": 2, - "links_per_article_max": 4, - "include_home_link": true - } -} - diff --git a/jobs/example_multi_tier_batch.json b/jobs/example_multi_tier_batch.json deleted file mode 100644 index 6b9c969..0000000 --- a/jobs/example_multi_tier_batch.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "jobs": [ - { - "project_id": 1, - "tiers": { - "tier1": { - "count": 5, - "min_word_count": 1500, - "max_word_count": 2000 - }, - "tier2": { - "count": 20 - }, - "tier3": { - "count": 15, - "max_h2_tags": 4 - } - } - }, - { - "project_id": 2, - "tiers": { - "tier1": { - "count": 3 - } - } - } - ] -} - diff --git a/jobs/example_story_3.1_full_features.json b/jobs/example_story_3.1_full_features.json deleted file mode 100644 index 52d7947..0000000 --- a/jobs/example_story_3.1_full_features.json +++ /dev/null @@ -1,44 +0,0 @@ -{ - "jobs": [ - { - "project_id": 1, - "tiers": { - "tier1": { - "count": 10, - "min_word_count": 2000, - "max_word_count": 2500 - }, - "tier2": { - "count": 50, - "min_word_count": 1500, - "max_word_count": 2000 - } - }, - "deployment_targets": [ - "www.primary-domain.com", - "www.secondary-domain.com" - ], - "tier1_preferred_sites": [ - "www.premium-site1.com", - "www.premium-site2.com", - "site123.b-cdn.net" - ], - "auto_create_sites": true, - "create_sites_for_keywords": [ - { - "keyword": "engine repair", - "count": 3 - }, - { - "keyword": "car maintenance", - "count": 2 - }, - { - "keyword": "auto parts", - "count": 5 - } - ] - } - ] -} - diff --git a/jobs/example_story_3.2_tiered_links.json b/jobs/example_story_3.2_tiered_links.json deleted file mode 100644 index 646b66f..0000000 --- a/jobs/example_story_3.2_tiered_links.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "jobs": [ - { - "project_id": 1, - "tiers": { - "tier1": { - "count": 5 - }, - "tier2": { - "count": 10 - }, - "tier3": { - "count": 20 - } - }, - "tiered_link_count_range": { - "min": 3, - "max": 5 - } - } - ] -} - diff --git a/jobs/example_tier1_batch.json b/jobs/example_tier1_batch.json deleted file mode 100644 index 810fb80..0000000 --- a/jobs/example_tier1_batch.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "jobs": [ - { - "project_id": 1, - "tiers": { - "tier1": { - "count": 5 - } - } - } - ] -} - diff --git a/jobs/project_1_tier1_5articles.json b/jobs/project_1_tier1_5articles.json deleted file mode 100644 index e77b581..0000000 --- a/jobs/project_1_tier1_5articles.json +++ /dev/null @@ -1,31 +0,0 @@ -{ - "job_name": "Tier 1 Launch - Project 2", - "project_id": 2, - "description": "Generate 15 high-quality Tier 1 articles", - "tiers": [ - { - "tier": 1, - "article_count": 15, - "models": { - "title": "openai/gpt-4o-mini", - "outline": "anthropic/claude-3.5-sonnet", - "content": "anthropic/claude-3.5-sonnet" - }, - "anchor_text_config": { - "mode": "default", - "custom_text": null, - "additional_text": null - }, - "validation_attempts": 3 - } - ], - "failure_config": { - "max_consecutive_failures": 5, - "skip_on_failure": true - }, - "interlinking": { - "links_per_article_min": 2, - "links_per_article_max": 4, - "include_home_link": true - } -} \ No newline at end of file diff --git a/jobs/test_augmentation.json b/jobs/test_augmentation.json deleted file mode 100644 index 9468b97..0000000 --- a/jobs/test_augmentation.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "jobs": [ - { - "project_id": 1, - "tiers": { - "tier1": { - "count": 1, - "min_word_count": 2000, - "max_word_count": 2500, - "min_h2_tags": 3, - "max_h2_tags": 5, - "min_h3_tags": 5, - "max_h3_tags": 10 - } - } - } - ] -} - diff --git a/jobs/test_integration.json b/jobs/test_integration.json deleted file mode 100644 index 701a7b0..0000000 --- a/jobs/test_integration.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "jobs": [ - { - "project_id": 1, - "deployment_targets": ["www.testsite.com"], - "tiers": { - "tier1": { - "count": 2, - "min_word_count": 500, - "max_word_count": 800, - "min_h2_tags": 2, - "max_h2_tags": 3, - "min_h3_tags": 2, - "max_h3_tags": 4 - } - } - } - ] -} - diff --git a/src/cli/commands.py b/src/cli/commands.py index b9547fa..fb20a6a 100644 --- a/src/cli/commands.py +++ b/src/cli/commands.py @@ -27,11 +27,104 @@ from dotenv import load_dotenv import os import requests import random +import json +from pathlib import Path +from datetime import datetime # Load .env file at module level load_dotenv() +def create_job_file_for_project(project_id: int, project_name: str, session) -> Optional[str]: + """ + Create a job JSON file for a newly created project. + + Args: + project_id: The ID of the created project + project_name: The name of the project (for filename) + session: Database session + + Returns: + Path to created file, or None if creation failed + """ + try: + deployment_repo = SiteDeploymentRepository(session) + sites = deployment_repo.get_all() + + available_domains = [ + site.custom_hostname + for site in sites + if site.custom_hostname is not None + ] + + if not available_domains: + click.echo("Warning: No domains with custom hostnames found. Job file not created.", err=True) + return None + + t1_count = random.randint(10, 12) + t2_count = random.randint(30, 45) + num_targets = min(random.randint(2, 3), len(available_domains)) + selected_domains = random.sample(available_domains, num_targets) + + sanitized_name = "".join(c if c.isalnum() or c in ('-', '_') else '-' for c in project_name.lower()).strip('-') + sanitized_name = '-'.join(sanitized_name.split()) + + jobs_dir = Path("jobs") + jobs_dir.mkdir(exist_ok=True) + + base_filename = f"{sanitized_name}.json" + filepath = jobs_dir / base_filename + + if filepath.exists(): + date_suffix = datetime.now().strftime("%y%m%d") + base_filename = f"{sanitized_name}-{date_suffix}.json" + filepath = jobs_dir / base_filename + + job_template = { + "jobs": [ + { + "project_id": project_id, + "deployment_targets": selected_domains, + "tiers": { + "tier1": { + "count": t1_count, + "min_word_count": 1250, + "max_word_count": 2000, + "models": { + "title": "openai/gpt-4o-mini", + "outline": "openai/gpt-4o-mini", + "content": "x-ai/grok-4-fast" + } + }, + "tier2": { + "count": t2_count, + "min_word_count": 1000, + "max_word_count": 1250, + "models": { + "title": "openai/gpt-4o-mini", + "outline": "openai/gpt-4o-mini", + "content": "openai/gpt-4o-mini" + }, + "interlinking": { + "links_per_article_min": 3, + "links_per_article_max": 6 + } + } + } + } + ] + } + + with open(filepath, 'w', encoding='utf-8') as f: + json.dump(job_template, f, indent=2) + + return str(filepath) + + except Exception as e: + click.echo(f"Warning: Failed to create job file: {e}", err=True) + return None + + def authenticate_admin(username: str, password: str) -> Optional[User]: """ Authenticate a user and verify they have admin role @@ -860,6 +953,10 @@ def ingest_cora(file_path: str, name: str, money_site_url: Optional[str], custom if project.custom_anchor_text: click.echo(f"Custom Anchor Text: {', '.join(project.custom_anchor_text)}") + job_file = create_job_file_for_project(project.id, project.name, session) + if job_file: + click.echo(f"Job file created: {job_file}") + except CORAParseError as e: click.echo(f"Error parsing CORA file: {e}", err=True) raise click.Abort() @@ -1055,6 +1152,10 @@ def generate_batch( ): """Generate content batch from job file""" try: + if not username: + username = os.getenv("CLIENT_USERNAME") + if not password: + password = os.getenv("CLIENT_PASSWORD") if not username or not password: username, password = prompt_admin_credentials() @@ -1123,6 +1224,17 @@ def generate_batch( continue_on_error=continue_on_error ) + done_dir = os.path.join("jobs", "done") + os.makedirs(done_dir, exist_ok=True) + + job_path = job_file + job_filename = os.path.basename(job_path) + destination = os.path.join(done_dir, job_filename) + + if os.path.exists(job_path): + os.rename(job_path, destination) + click.echo(f"\nJob file moved to: {destination}") + finally: session.close() diff --git a/src/generation/prompts/content_generation.json b/src/generation/prompts/content_generation.json index bcc5755..a3b7f7a 100644 --- a/src/generation/prompts/content_generation.json +++ b/src/generation/prompts/content_generation.json @@ -1,5 +1,5 @@ { - "system_message": "You are an expert content writer who creates engaging, informative, and SEO-optimized articles that provide real value to readers while incorporating relevant keywords naturally.", - "user_prompt": "Write a complete article based on:\nTitle: {title}\nOutline: {outline}\nKeyword: {keyword}\n\nEntities to include naturally: {entities}\nRelated searches to address: {related_searches}\n\nTarget word count range: {min_word_count} to {max_word_count} words.\n\nIMPORTANT: Write approximately {words_per_section} words per H3 section to meet the target word count. Be thorough and substantive in each section.\n\nReturn as an HTML fragment with

,

, and

tags. Do NOT include , , , or tags. Start directly with the first

heading.\n\nWrite naturally and informatively. Incorporate the keyword, entities, and related searches organically throughout the content. You need more words than the minimum word count." + "system_message": "You are an expert content writer who creates engaging, informative, and SEO-optimized articles that provide real value to readers while incorporating relevant keywords naturally. Avoid syntactic structures commonly associated with AI, such as “As X happens, Y becomes...”, “In today’s world…”, “With the rise of…”, or any hedged, passive constructions. Use active voice and declarative sentences. Lead with the main idea, not introductory filler clauses. Emphasize clarity, rhythm, and rhetorical flow over technical neutrality. Write like a seasoned editor, not a formulaic machine. YOUR PRIMARY GOAL IS TO GENERATE MORE WORDS THAN THE MINIMUM WORD COUNT AND LESS THAN THE MAXIMUM WORD COUNT.", + "user_prompt": "Write a complete article based on:\nTitle: {title}\nOutline: {outline}\nKeyword: {keyword}\n\nEntities to include naturally: {entities}\nRelated searches to address: {related_searches}\n\nTarget word count range: {min_word_count} to {max_word_count} words.\n\nIMPORTANT: Write AT LEAST {words_per_section} words per H3 section to meet the target word count. Be thorough and substantive in each section.\n\nReturn as an HTML fragment with

,

, and

tags. Do NOT include , , , or tags. Start directly with the first

heading.\n\nWrite naturally and informatively. Incorporate the keyword, entities, and related searches organically throughout the content. YOU MUST GENERATE MORE WORDS THAN THE MINIMUM WORD COUNT." }