Remove job files from git tracking and update commands/prompts

2025-11-17 11:07:25 -06:00 · 2025-11-17 11:07:25 -06:00 · d5c501f004
parent 4710677614
commit d5c501f004
14 changed files with 115 additions and 404 deletions
--- a/.gitignore
+++ b/.gitignore
@ -33,6 +33,7 @@ content_automation.db*
 # Generated job files
 jobs/*.json
 jobs/done/
 # Generated content files
 *_project*.html
--- a/jobs/EXAMPLE_MULTI.json
+++ b/jobs/EXAMPLE_MULTI.json
@ -1,123 +0,0 @@
 {
  "jobs": [
    {
      "project_id": 100,
      "models": {
        "title": "anthropic/claude-3.5-sonnet",
        "outline": "anthropic/claude-3.5-sonnet",
        "content": "openai/gpt-4o"
      },
      "deployment_targets": [
        "www.autorepairpro.com",
        "www.carmaintenanceguide.com",
        "www.enginespecialist.net"
      ],
      "tier1_preferred_sites": [
        "www.premium-automotive.com",
        "www.expert-mechanic.org",
        "autorepair123.b-cdn.net",
        "carmaintenance456.b-cdn.net"
      ],
      "auto_create_sites": true,
      "create_sites_for_keywords": [
        {
          "keyword": "engine repair",
          "count": 4
        },
        {
          "keyword": "transmission service",
          "count": 3
        },
        {
          "keyword": "brake system",
          "count": 2
        }
      ],
      "tiered_link_count_range": {
        "min": 3,
        "max": 6
      },
      "tiers": {
        "tier1": {
          "count": 8,
          "min_word_count": 2200,
          "max_word_count": 2800,
          "min_h2_tags": 4,
          "max_h2_tags": 6,
          "min_h3_tags": 6,
          "max_h3_tags": 12
        }
      }
    },
    {
      "project_id": 101,
      "models": {
        "title": "openai/gpt-4o-mini",
        "outline": "openai/gpt-4o",
        "content": "anthropic/claude-3.5-sonnet"
      },
      "deployment_targets": [
        "www.digitalmarketinghub.com",
        "www.seoexperts.org"
      ],
      "tier1_preferred_sites": [
        "www.premium-seo.com",
        "www.marketingmastery.net",
        "seoexpert789.b-cdn.net",
        "digitalmarketing456.b-cdn.net"
      ],
      "auto_create_sites": true,
      "create_sites_for_keywords": [
        {
          "keyword": "SEO optimization",
          "count": 5
        },
        {
          "keyword": "content marketing",
          "count": 4
        },
        {
          "keyword": "social media strategy",
          "count": 3
        },
        {
          "keyword": "email marketing",
          "count": 2
        }
      ],
      "tiered_link_count_range": {
        "min": 2,
        "max": 5
      },
      "tiers": {
        "tier1": {
          "count": 12,
          "min_word_count": 2000,
          "max_word_count": 2500,
          "min_h2_tags": 3,
          "max_h2_tags": 5,
          "min_h3_tags": 5,
          "max_h3_tags": 10
        },
        "tier2": {
          "count": 25,
          "min_word_count": 1500,
          "max_word_count": 2000,
          "min_h2_tags": 2,
          "max_h2_tags": 4,
          "min_h3_tags": 3,
          "max_h3_tags": 8
        },
        "tier3": {
          "count": 40,
          "min_word_count": 1000,
          "max_word_count": 1500,
          "min_h2_tags": 2,
          "max_h2_tags": 3,
          "min_h3_tags": 2,
          "max_h3_tags": 6
        }
      }
    }
  ]
 }
--- a/jobs/example_custom_anchors.json
+++ b/jobs/example_custom_anchors.json
@ -1,37 +0,0 @@
 {
  "job_name": "Custom Anchor Text Test",
  "project_id": 1,
  "description": "Small batch with custom anchor text overrides for testing",
  "tiers": [
    {
      "tier": 1,
      "article_count": 5,
      "models": {
        "title": "anthropic/claude-3.5-sonnet",
        "outline": "anthropic/claude-3.5-sonnet",
        "content": "anthropic/claude-3.5-sonnet"
      },
      "anchor_text_config": {
        "mode": "override",
        "custom_text": [
          "click here for more info",
          "learn more about this topic",
          "discover the best practices",
          "expert guide and resources",
          "comprehensive tutorial"
        ]
      },
      "validation_attempts": 3
    }
  ],
  "failure_config": {
    "max_consecutive_failures": 3,
    "skip_on_failure": true
  },
  "interlinking": {
    "links_per_article_min": 3,
    "links_per_article_max": 3,
    "include_home_link": true
  }
 }
--- a/jobs/example_deployment_targets.json
+++ b/jobs/example_deployment_targets.json
@ -1,24 +0,0 @@
 {
  "jobs": [
    {
      "project_id": 2,
      "deployment_targets": [
        "www.domain1.com",
        "www.domain2.com",
        "www.domain3.com"
      ],
      "tiers": {
        "tier1": {
          "count": 10,
          "min_word_count": 2000,
          "max_word_count": 2500,
          "min_h2_tags": 3,
          "max_h2_tags": 5,
          "min_h3_tags": 5,
          "max_h3_tags": 10
        }
      }
    }
  ]
 }
--- a/jobs/example_multi_site_deployment.json
+++ b/jobs/example_multi_site_deployment.json
@ -1,38 +0,0 @@
 {
  "job_name": "Multi-Site T1 Launch - 10 Articles Across 3 Sites",
  "project_id": 2,
  "description": "Generate 10 Tier 1 articles distributed across 3 deployment sites",
  "deployment_targets": [
    "www.domain1.com",
    "www.domain2.com",
    "www.domain3.com"
  ],
  "deployment_overflow": "round_robin",
  "tiers": [
    {
      "tier": 1,
      "article_count": 10,
      "models": {
        "title": "openai/gpt-4o-mini",
        "outline": "anthropic/claude-3.5-sonnet",
        "content": "anthropic/claude-3.5-sonnet"
      },
      "anchor_text_config": {
        "mode": "default",
        "custom_text": null,
        "additional_text": null
      },
      "validation_attempts": 3
    }
  ],
  "failure_config": {
    "max_consecutive_failures": 5,
    "skip_on_failure": true
  },
  "interlinking": {
    "links_per_article_min": 2,
    "links_per_article_max": 4,
    "include_home_link": true
  }
 }
--- a/jobs/example_multi_tier_batch.json
+++ b/jobs/example_multi_tier_batch.json
@ -1,30 +0,0 @@
 {
  "jobs": [
    {
      "project_id": 1,
      "tiers": {
        "tier1": {
          "count": 5,
          "min_word_count": 1500,
          "max_word_count": 2000
        },
        "tier2": {
          "count": 20
        },
        "tier3": {
          "count": 15,
          "max_h2_tags": 4
        }
      }
    },
    {
      "project_id": 2,
      "tiers": {
        "tier1": {
          "count": 3
        }
      }
    }
  ]
 }
--- a/jobs/example_story_3.1_full_features.json
+++ b/jobs/example_story_3.1_full_features.json
@ -1,44 +0,0 @@
 {
  "jobs": [
    {
      "project_id": 1,
      "tiers": {
        "tier1": {
          "count": 10,
          "min_word_count": 2000,
          "max_word_count": 2500
        },
        "tier2": {
          "count": 50,
          "min_word_count": 1500,
          "max_word_count": 2000
        }
      },
      "deployment_targets": [
        "www.primary-domain.com",
        "www.secondary-domain.com"
      ],
      "tier1_preferred_sites": [
        "www.premium-site1.com",
        "www.premium-site2.com",
        "site123.b-cdn.net"
      ],
      "auto_create_sites": true,
      "create_sites_for_keywords": [
        {
          "keyword": "engine repair",
          "count": 3
        },
        {
          "keyword": "car maintenance",
          "count": 2
        },
        {
          "keyword": "auto parts",
          "count": 5
        }
      ]
    }
  ]
 }
--- a/jobs/example_story_3.2_tiered_links.json
+++ b/jobs/example_story_3.2_tiered_links.json
@ -1,23 +0,0 @@
 {
  "jobs": [
    {
      "project_id": 1,
      "tiers": {
        "tier1": {
          "count": 5
        },
        "tier2": {
          "count": 10
        },
        "tier3": {
          "count": 20
        }
      },
      "tiered_link_count_range": {
        "min": 3,
        "max": 5
      }
    }
  ]
 }
--- a/jobs/example_tier1_batch.json
+++ b/jobs/example_tier1_batch.json
@ -1,13 +0,0 @@
 {
  "jobs": [
    {
      "project_id": 1,
      "tiers": {
        "tier1": {
          "count": 5
        }
      }
    }
  ]
 }
--- a/jobs/project_1_tier1_5articles.json
+++ b/jobs/project_1_tier1_5articles.json
@ -1,31 +0,0 @@
 {
  "job_name": "Tier 1 Launch - Project 2",
  "project_id": 2,
  "description": "Generate 15 high-quality Tier 1 articles",
  "tiers": [
    {
      "tier": 1,
      "article_count": 15,
      "models": {
        "title": "openai/gpt-4o-mini",
        "outline": "anthropic/claude-3.5-sonnet",
        "content": "anthropic/claude-3.5-sonnet"
      },
      "anchor_text_config": {
        "mode": "default",
        "custom_text": null,
        "additional_text": null
      },
      "validation_attempts": 3
    }
  ],
  "failure_config": {
    "max_consecutive_failures": 5,
    "skip_on_failure": true
  },
  "interlinking": {
    "links_per_article_min": 2,
    "links_per_article_max": 4,
    "include_home_link": true
  }
 }
--- a/jobs/test_augmentation.json
+++ b/jobs/test_augmentation.json
@ -1,19 +0,0 @@
 {
  "jobs": [
    {
      "project_id": 1,
      "tiers": {
        "tier1": {
          "count": 1,
          "min_word_count": 2000,
          "max_word_count": 2500,
          "min_h2_tags": 3,
          "max_h2_tags": 5,
          "min_h3_tags": 5,
          "max_h3_tags": 10
        }
      }
    }
  ]
 }
--- a/jobs/test_integration.json
+++ b/jobs/test_integration.json
@ -1,20 +0,0 @@
 {
  "jobs": [
    {
      "project_id": 1,
      "deployment_targets": ["www.testsite.com"],
      "tiers": {
        "tier1": {
          "count": 2,
          "min_word_count": 500,
          "max_word_count": 800,
          "min_h2_tags": 2,
          "max_h2_tags": 3,
          "min_h3_tags": 2,
          "max_h3_tags": 4
        }
      }
    }
  ]
 }
--- a/src/cli/commands.py
+++ b/src/cli/commands.py
@ -27,11 +27,104 @@ from dotenv import load_dotenv
 import os
 import requests
 import random
 import json
 from pathlib import Path
 from datetime import datetime
 # Load .env file at module level
 load_dotenv()
 def create_job_file_for_project(project_id: int, project_name: str, session) -> Optional[str]:
    """
    Create a job JSON file for a newly created project.
    Args:
        project_id: The ID of the created project
        project_name: The name of the project (for filename)
        session: Database session
    Returns:
        Path to created file, or None if creation failed
    """
    try:
        deployment_repo = SiteDeploymentRepository(session)
        sites = deployment_repo.get_all()
        available_domains = [
            site.custom_hostname 
            for site in sites 
            if site.custom_hostname is not None
        ]
        if not available_domains:
            click.echo("Warning: No domains with custom hostnames found. Job file not created.", err=True)
            return None
        t1_count = random.randint(10, 12)
        t2_count = random.randint(30, 45)
        num_targets = min(random.randint(2, 3), len(available_domains))
        selected_domains = random.sample(available_domains, num_targets)
        sanitized_name = "".join(c if c.isalnum() or c in ('-', '_') else '-' for c in project_name.lower()).strip('-')
        sanitized_name = '-'.join(sanitized_name.split())
        jobs_dir = Path("jobs")
        jobs_dir.mkdir(exist_ok=True)
        base_filename = f"{sanitized_name}.json"
        filepath = jobs_dir / base_filename
        if filepath.exists():
            date_suffix = datetime.now().strftime("%y%m%d")
            base_filename = f"{sanitized_name}-{date_suffix}.json"
            filepath = jobs_dir / base_filename
        job_template = {
            "jobs": [
                {
                    "project_id": project_id,
                    "deployment_targets": selected_domains,
                    "tiers": {
                        "tier1": {
                            "count": t1_count,
                            "min_word_count": 1250,
                            "max_word_count": 2000,
                            "models": {
                                "title": "openai/gpt-4o-mini",
                                "outline": "openai/gpt-4o-mini",
                                "content": "x-ai/grok-4-fast"
                            }
                        },
                        "tier2": {
                            "count": t2_count,
                            "min_word_count": 1000,
                            "max_word_count": 1250,
                            "models": {
                                "title": "openai/gpt-4o-mini",
                                "outline": "openai/gpt-4o-mini",
                                "content": "openai/gpt-4o-mini"
                            },
                            "interlinking": {
                                "links_per_article_min": 3,
                                "links_per_article_max": 6
                            }
                        }
                    }
                }
            ]
        }
        with open(filepath, 'w', encoding='utf-8') as f:
            json.dump(job_template, f, indent=2)
        return str(filepath)
    except Exception as e:
        click.echo(f"Warning: Failed to create job file: {e}", err=True)
        return None
 def authenticate_admin(username: str, password: str) -> Optional[User]:
    """
    Authenticate a user and verify they have admin role
@ -860,6 +953,10 @@ def ingest_cora(file_path: str, name: str, money_site_url: Optional[str], custom
            if project.custom_anchor_text:
                click.echo(f"Custom Anchor Text: {', '.join(project.custom_anchor_text)}")
            job_file = create_job_file_for_project(project.id, project.name, session)
            if job_file:
                click.echo(f"Job file created: {job_file}")
        except CORAParseError as e:
            click.echo(f"Error parsing CORA file: {e}", err=True)
            raise click.Abort()
@ -1055,6 +1152,10 @@ def generate_batch(
 ):
    """Generate content batch from job file"""
    try:
        if not username:
            username = os.getenv("CLIENT_USERNAME")
        if not password:
            password = os.getenv("CLIENT_PASSWORD")
        if not username or not password:
            username, password = prompt_admin_credentials()
@ -1123,6 +1224,17 @@ def generate_batch(
                continue_on_error=continue_on_error
            )
            done_dir = os.path.join("jobs", "done")
            os.makedirs(done_dir, exist_ok=True)
            job_path = job_file
            job_filename = os.path.basename(job_path)
            destination = os.path.join(done_dir, job_filename)
            if os.path.exists(job_path):
                os.rename(job_path, destination)
                click.echo(f"\nJob file moved to: {destination}")
        finally:
            session.close()
--- a/src/generation/prompts/content_generation.json
+++ b/src/generation/prompts/content_generation.json
@ -1,5 +1,5 @@
 {
-  "system_message": "You are an expert content writer who creates engaging, informative, and SEO-optimized articles that provide real value to readers while incorporating relevant keywords naturally.",
+  "system_message": "You are an expert content writer who creates engaging, informative, and SEO-optimized articles that provide real value to readers while incorporating relevant keywords naturally. Avoid syntactic structures commonly associated with AI, such as “As X happens, Y becomes...”, “In today’s world…”, “With the rise of…”, or any hedged, passive constructions. Use active voice and declarative sentences. Lead with the main idea, not introductory filler clauses. Emphasize clarity, rhythm, and rhetorical flow over technical neutrality. Write like a seasoned editor, not a formulaic machine. YOUR PRIMARY GOAL IS TO GENERATE MORE WORDS THAN THE MINIMUM WORD COUNT AND LESS THAN THE MAXIMUM WORD COUNT.",
-  "user_prompt": "Write a complete article based on:\nTitle: {title}\nOutline: {outline}\nKeyword: {keyword}\n\nEntities to include naturally: {entities}\nRelated searches to address: {related_searches}\n\nTarget word count range: {min_word_count} to {max_word_count} words.\n\nIMPORTANT: Write approximately {words_per_section} words per H3 section to meet the target word count. Be thorough and substantive in each section.\n\nReturn as an HTML fragment with <h2>, <h3>, and <p> tags. Do NOT include <!DOCTYPE>, <html>, <head>, or <body> tags. Start directly with the first <h2> heading.\n\nWrite naturally and informatively. Incorporate the keyword, entities, and related searches organically throughout the content. You need more words than the minimum word count."
+  "user_prompt": "Write a complete article based on:\nTitle: {title}\nOutline: {outline}\nKeyword: {keyword}\n\nEntities to include naturally: {entities}\nRelated searches to address: {related_searches}\n\nTarget word count range: {min_word_count} to {max_word_count} words.\n\nIMPORTANT: Write AT LEAST {words_per_section} words per H3 section to meet the target word count. Be thorough and substantive in each section.\n\nReturn as an HTML fragment with <h2>, <h3>, and <p> tags. Do NOT include <!DOCTYPE>, <html>, <head>, or <body> tags. Start directly with the first <h2> heading.\n\nWrite naturally and informatively. Incorporate the keyword, entities, and related searches organically throughout the content. YOU MUST GENERATE MORE WORDS THAN THE MINIMUM WORD COUNT."
 }