Fixed: throws error early on no money_site_rul set.
parent
3649e88f44
commit
26b6e75448
|
|
@ -157,7 +157,8 @@ TIER_DEFAULTS = {
|
||||||
```python
|
```python
|
||||||
AVAILABLE_MODELS = {
|
AVAILABLE_MODELS = {
|
||||||
"gpt-4o-mini": "openai/gpt-4o-mini",
|
"gpt-4o-mini": "openai/gpt-4o-mini",
|
||||||
"claude-sonnet-4.5": "anthropic/claude-3.5-sonnet"
|
"claude-sonnet-4.5": "anthropic/claude-3.5-sonnet",
|
||||||
|
MANY OTHERS _ CHECK OUT OPENROUTER API FOR MORE
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -488,6 +489,9 @@ class BatchProcessor:
|
||||||
Process all jobs in job file
|
Process all jobs in job file
|
||||||
|
|
||||||
For each job:
|
For each job:
|
||||||
|
0. Validate project configuration (fail fast if invalid)
|
||||||
|
- Check project exists
|
||||||
|
- Validate money_site_url is set (required for tiered linking strategy)
|
||||||
For each tier:
|
For each tier:
|
||||||
For count times:
|
For count times:
|
||||||
1. Generate title (log to console)
|
1. Generate title (log to console)
|
||||||
|
|
@ -530,6 +534,9 @@ Summary:
|
||||||
**File**: `src/generation/batch_processor.py`
|
**File**: `src/generation/batch_processor.py`
|
||||||
|
|
||||||
**Error handling strategy**:
|
**Error handling strategy**:
|
||||||
|
- Project validation errors: Fail fast before generation starts
|
||||||
|
- Missing project: Abort with clear error
|
||||||
|
- Missing money_site_url: Abort with clear error (required for all jobs)
|
||||||
- AI API errors: Log error, mark as `status='failed'`, save to DB
|
- AI API errors: Log error, mark as `status='failed'`, save to DB
|
||||||
- If `continue_on_error=True`: continue to next article
|
- If `continue_on_error=True`: continue to next article
|
||||||
- If `continue_on_error=False`: stop batch processing
|
- If `continue_on_error=False`: stop batch processing
|
||||||
|
|
|
||||||
|
|
@ -602,7 +602,12 @@ Generate `index.html` for each site with:
|
||||||
**Dependencies**: Story 3.4 (boilerplate page infrastructure)
|
**Dependencies**: Story 3.4 (boilerplate page infrastructure)
|
||||||
|
|
||||||
---
|
---
|
||||||
|
### www vs root in domain imports
|
||||||
|
#### Problem
|
||||||
|
Domains are stored as either www.domain.com or domain.com in the table, but if you search on the wrong one through any of the scripts (like main.py get-site or on an job.json import) it will fail.
|
||||||
|
|
||||||
|
#### Solution
|
||||||
|
partial match on search? search for both www or root in the logic? Just ideas, havent fleshed it out.
|
||||||
## Future Sections
|
## Future Sections
|
||||||
|
|
||||||
Add new technical debt items below as they're identified during development.
|
Add new technical debt items below as they're identified during development.
|
||||||
|
|
|
||||||
|
|
@ -5,11 +5,11 @@
|
||||||
"tiers": {
|
"tiers": {
|
||||||
"tier1": {
|
"tier1": {
|
||||||
"count": 5,
|
"count": 5,
|
||||||
"min_word_count": 2200,
|
"min_word_count": 1500,
|
||||||
"max_word_count": 2600
|
"max_word_count": 2000
|
||||||
},
|
},
|
||||||
"tier2": {
|
"tier2": {
|
||||||
"count": 10
|
"count": 20
|
||||||
},
|
},
|
||||||
"tier3": {
|
"tier3": {
|
||||||
"count": 15,
|
"count": 15,
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,210 @@
|
||||||
|
"""
|
||||||
|
Post-process existing articles that were generated but not fully processed.
|
||||||
|
|
||||||
|
This script applies post-processing steps to articles that are already in the database:
|
||||||
|
- Site assignment (if needed)
|
||||||
|
- URL generation
|
||||||
|
- Tiered link discovery
|
||||||
|
- Interlink injection
|
||||||
|
- Template application
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
uv run python scripts/post_process_batch.py --project-id 1 --tier tier1 tier2
|
||||||
|
uv run python scripts/post_process_batch.py --project-id 1 --all-tiers
|
||||||
|
"""
|
||||||
|
|
||||||
|
import click
|
||||||
|
import os
|
||||||
|
from src.database.session import db_manager
|
||||||
|
from src.database.repositories import (
|
||||||
|
GeneratedContentRepository,
|
||||||
|
ProjectRepository,
|
||||||
|
SiteDeploymentRepository,
|
||||||
|
ArticleLinkRepository,
|
||||||
|
SitePageRepository
|
||||||
|
)
|
||||||
|
from src.generation.ai_client import AIClient, PromptManager
|
||||||
|
from src.generation.service import ContentGenerator
|
||||||
|
from src.generation.url_generator import generate_urls_for_batch
|
||||||
|
from src.generation.site_assignment import assign_sites_to_batch
|
||||||
|
from src.generation.job_config import Job, InterlinkingConfig
|
||||||
|
from src.interlinking.tiered_links import find_tiered_links
|
||||||
|
from src.interlinking.content_injection import inject_interlinks
|
||||||
|
|
||||||
|
|
||||||
|
@click.command()
|
||||||
|
@click.option('--project-id', '-p', required=True, type=int, help='Project ID to post-process')
|
||||||
|
@click.option('--tier', '-t', multiple=True, help='Tiers to process (e.g., tier1, tier2)')
|
||||||
|
@click.option('--all-tiers', is_flag=True, help='Process all tiers')
|
||||||
|
@click.option('--skip-site-assignment', is_flag=True, help='Skip site assignment step')
|
||||||
|
@click.option('--skip-urls', is_flag=True, help='Skip URL generation')
|
||||||
|
@click.option('--skip-interlinks', is_flag=True, help='Skip interlinking')
|
||||||
|
@click.option('--skip-templates', is_flag=True, help='Skip template application')
|
||||||
|
def post_process_batch(
|
||||||
|
project_id: int,
|
||||||
|
tier: tuple,
|
||||||
|
all_tiers: bool,
|
||||||
|
skip_site_assignment: bool,
|
||||||
|
skip_urls: bool,
|
||||||
|
skip_interlinks: bool,
|
||||||
|
skip_templates: bool
|
||||||
|
):
|
||||||
|
"""Post-process existing articles in the database"""
|
||||||
|
|
||||||
|
if not tier and not all_tiers:
|
||||||
|
click.echo("Error: Must specify either --tier or --all-tiers", err=True)
|
||||||
|
return
|
||||||
|
|
||||||
|
session = db_manager.get_session()
|
||||||
|
|
||||||
|
try:
|
||||||
|
project_repo = ProjectRepository(session)
|
||||||
|
content_repo = GeneratedContentRepository(session)
|
||||||
|
site_repo = SiteDeploymentRepository(session)
|
||||||
|
link_repo = ArticleLinkRepository(session)
|
||||||
|
|
||||||
|
project = project_repo.get_by_id(project_id)
|
||||||
|
if not project:
|
||||||
|
click.echo(f"Error: Project {project_id} not found", err=True)
|
||||||
|
return
|
||||||
|
|
||||||
|
if not project.money_site_url:
|
||||||
|
click.echo(f"Error: Project {project_id} has no money_site_url set", err=True)
|
||||||
|
click.echo("Please set money_site_url before post-processing", err=True)
|
||||||
|
return
|
||||||
|
|
||||||
|
click.echo(f"\nPost-processing project: {project.name} (ID: {project_id})")
|
||||||
|
click.echo(f"Keyword: {project.main_keyword}")
|
||||||
|
click.echo(f"Money site: {project.money_site_url}\n")
|
||||||
|
|
||||||
|
tiers_to_process = []
|
||||||
|
if all_tiers:
|
||||||
|
all_articles = content_repo.get_by_project_id(project_id)
|
||||||
|
tiers_to_process = sorted(set(a.tier for a in all_articles))
|
||||||
|
click.echo(f"Found tiers: {', '.join(tiers_to_process)}\n")
|
||||||
|
else:
|
||||||
|
tiers_to_process = list(tier)
|
||||||
|
|
||||||
|
api_key = os.getenv("OPENROUTER_API_KEY")
|
||||||
|
if not api_key:
|
||||||
|
click.echo("Error: OPENROUTER_API_KEY not found in environment", err=True)
|
||||||
|
return
|
||||||
|
|
||||||
|
ai_client = AIClient(api_key=api_key, model='gpt-4o-mini')
|
||||||
|
prompt_manager = PromptManager()
|
||||||
|
content_generator = ContentGenerator(
|
||||||
|
ai_client=ai_client,
|
||||||
|
prompt_manager=prompt_manager,
|
||||||
|
project_repo=project_repo,
|
||||||
|
content_repo=content_repo,
|
||||||
|
site_deployment_repo=site_repo
|
||||||
|
)
|
||||||
|
|
||||||
|
job = Job(
|
||||||
|
project_id=project_id,
|
||||||
|
tiers={},
|
||||||
|
interlinking=InterlinkingConfig(
|
||||||
|
links_per_article_min=2,
|
||||||
|
links_per_article_max=4,
|
||||||
|
include_home_link=True
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
for tier_name in tiers_to_process:
|
||||||
|
click.echo(f"Processing {tier_name}...")
|
||||||
|
|
||||||
|
all_articles = content_repo.get_by_project_and_tier(
|
||||||
|
project_id, tier_name, require_site=False
|
||||||
|
)
|
||||||
|
|
||||||
|
if not all_articles:
|
||||||
|
click.echo(f" No articles found for {tier_name}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
click.echo(f" Found {len(all_articles)} articles")
|
||||||
|
|
||||||
|
articles_without_sites = [a for a in all_articles if not a.site_deployment_id]
|
||||||
|
|
||||||
|
if articles_without_sites and not skip_site_assignment:
|
||||||
|
click.echo(f" Assigning sites to {len(articles_without_sites)} articles...")
|
||||||
|
try:
|
||||||
|
assign_sites_to_batch(
|
||||||
|
content_records=all_articles,
|
||||||
|
job=job,
|
||||||
|
site_repo=site_repo,
|
||||||
|
bunny_client=None,
|
||||||
|
project_keyword=project.main_keyword
|
||||||
|
)
|
||||||
|
session.expire_all()
|
||||||
|
all_articles = content_repo.get_by_project_and_tier(
|
||||||
|
project_id, tier_name, require_site=False
|
||||||
|
)
|
||||||
|
click.echo(f" Assigned sites successfully")
|
||||||
|
except Exception as e:
|
||||||
|
click.echo(f" Warning: Site assignment failed: {e}")
|
||||||
|
|
||||||
|
content_records = [a for a in all_articles if a.site_deployment_id]
|
||||||
|
|
||||||
|
if not content_records:
|
||||||
|
click.echo(f" No articles with site assignments, skipping {tier_name}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
click.echo(f" Processing {len(content_records)} articles with sites...")
|
||||||
|
|
||||||
|
if not skip_urls:
|
||||||
|
click.echo(f" Generating URLs...")
|
||||||
|
article_urls = generate_urls_for_batch(content_records, site_repo)
|
||||||
|
click.echo(f" Generated {len(article_urls)} URLs")
|
||||||
|
else:
|
||||||
|
article_urls = {a.id: a.url for a in content_records if a.url}
|
||||||
|
|
||||||
|
if not skip_interlinks:
|
||||||
|
click.echo(f" Finding tiered links...")
|
||||||
|
tiered_links = find_tiered_links(
|
||||||
|
content_records,
|
||||||
|
job,
|
||||||
|
project_repo,
|
||||||
|
content_repo,
|
||||||
|
site_repo
|
||||||
|
)
|
||||||
|
click.echo(f" Found tiered links for tier {tiered_links.get('tier', 'N/A')}")
|
||||||
|
|
||||||
|
click.echo(f" Injecting interlinks...")
|
||||||
|
inject_interlinks(
|
||||||
|
content_records,
|
||||||
|
article_urls,
|
||||||
|
tiered_links,
|
||||||
|
project,
|
||||||
|
job,
|
||||||
|
content_repo,
|
||||||
|
link_repo
|
||||||
|
)
|
||||||
|
click.echo(f" Interlinks injected successfully")
|
||||||
|
|
||||||
|
if not skip_templates:
|
||||||
|
click.echo(f" Applying templates...")
|
||||||
|
template_count = 0
|
||||||
|
for content in content_records:
|
||||||
|
try:
|
||||||
|
if content_generator.apply_template(content.id):
|
||||||
|
template_count += 1
|
||||||
|
except Exception as e:
|
||||||
|
click.echo(f" Warning: Failed to apply template to content {content.id}: {e}")
|
||||||
|
|
||||||
|
click.echo(f" Applied templates to {template_count}/{len(content_records)} articles")
|
||||||
|
|
||||||
|
click.echo(f" {tier_name}: Complete\n")
|
||||||
|
|
||||||
|
click.echo("\n" + "=" * 70)
|
||||||
|
click.echo("Post-processing complete!")
|
||||||
|
click.echo("=" * 70)
|
||||||
|
click.echo(f"\nYou can now deploy with:")
|
||||||
|
click.echo(f" uv run python main.py deploy-batch --batch-id {project_id}")
|
||||||
|
|
||||||
|
finally:
|
||||||
|
session.close()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
post_process_batch()
|
||||||
|
|
||||||
|
|
@ -0,0 +1,51 @@
|
||||||
|
"""
|
||||||
|
Set the money_site_url for a project
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
uv run python scripts/set_money_site_url.py --project-id 1 --url "https://example.com"
|
||||||
|
uv run python scripts/set_money_site_url.py --project-id 1 --url "https://www.mysite.com"
|
||||||
|
"""
|
||||||
|
|
||||||
|
import click
|
||||||
|
from src.database.session import db_manager
|
||||||
|
from src.database.repositories import ProjectRepository
|
||||||
|
|
||||||
|
|
||||||
|
@click.command()
|
||||||
|
@click.option('--project-id', '-p', required=True, type=int, help='Project ID')
|
||||||
|
@click.option('--url', '-u', required=True, help='Money site URL (e.g., https://example.com)')
|
||||||
|
def set_money_site_url(project_id: int, url: str):
|
||||||
|
"""Set the money_site_url for a project"""
|
||||||
|
|
||||||
|
if not url.startswith('http://') and not url.startswith('https://'):
|
||||||
|
click.echo("Error: URL must start with http:// or https://", err=True)
|
||||||
|
return
|
||||||
|
|
||||||
|
url = url.rstrip('/')
|
||||||
|
|
||||||
|
session = db_manager.get_session()
|
||||||
|
|
||||||
|
try:
|
||||||
|
project_repo = ProjectRepository(session)
|
||||||
|
|
||||||
|
project = project_repo.get_by_id(project_id)
|
||||||
|
if not project:
|
||||||
|
click.echo(f"Error: Project {project_id} not found", err=True)
|
||||||
|
return
|
||||||
|
|
||||||
|
old_url = project.money_site_url or "(not set)"
|
||||||
|
|
||||||
|
project.money_site_url = url
|
||||||
|
project_repo.update(project)
|
||||||
|
|
||||||
|
click.echo(f"Success: Updated project {project_id}: {project.name}")
|
||||||
|
click.echo(f" Old URL: {old_url}")
|
||||||
|
click.echo(f" New URL: {url}")
|
||||||
|
|
||||||
|
finally:
|
||||||
|
session.close()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
set_money_site_url()
|
||||||
|
|
||||||
|
|
@ -500,7 +500,7 @@ def list_sites(admin_user: Optional[str], admin_password: Optional[str]):
|
||||||
click.echo("-" * 100)
|
click.echo("-" * 100)
|
||||||
|
|
||||||
for site in sites:
|
for site in sites:
|
||||||
click.echo(f"{site.id:<5} {site.site_name:<25} {site.custom_hostname:<30} "
|
click.echo(f"{site.id:<5} {site.site_name:<25} {site.custom_hostname or 'N/A':<30} "
|
||||||
f"{site.storage_zone_name:<20} {site.storage_zone_region:<8}")
|
f"{site.storage_zone_name:<20} {site.storage_zone_region:<8}")
|
||||||
|
|
||||||
click.echo("-" * 100)
|
click.echo("-" * 100)
|
||||||
|
|
|
||||||
|
|
@ -88,6 +88,13 @@ class BatchProcessor:
|
||||||
if not project:
|
if not project:
|
||||||
raise ValueError(f"Project {job.project_id} not found")
|
raise ValueError(f"Project {job.project_id} not found")
|
||||||
|
|
||||||
|
if not project.money_site_url:
|
||||||
|
raise ValueError(
|
||||||
|
f"Cannot generate articles: money_site_url not set for project {job.project_id}. "
|
||||||
|
f"Please set money_site_url in the project configuration. "
|
||||||
|
f"The money site is required for the tiered linking strategy."
|
||||||
|
)
|
||||||
|
|
||||||
click.echo(f"\nProcessing Job {job_idx}/{self.stats['total_jobs']}: Project ID {job.project_id}")
|
click.echo(f"\nProcessing Job {job_idx}/{self.stats['total_jobs']}: Project ID {job.project_id}")
|
||||||
|
|
||||||
if job.models:
|
if job.models:
|
||||||
|
|
|
||||||
|
|
@ -17,16 +17,16 @@ TIER_DEFAULTS = {
|
||||||
"max_h3_tags": 10
|
"max_h3_tags": 10
|
||||||
},
|
},
|
||||||
"tier2": {
|
"tier2": {
|
||||||
"min_word_count": 1500,
|
"min_word_count": 1100,
|
||||||
"max_word_count": 2000,
|
"max_word_count": 1500,
|
||||||
"min_h2_tags": 2,
|
"min_h2_tags": 2,
|
||||||
"max_h2_tags": 4,
|
"max_h2_tags": 4,
|
||||||
"min_h3_tags": 3,
|
"min_h3_tags": 3,
|
||||||
"max_h3_tags": 8
|
"max_h3_tags": 8
|
||||||
},
|
},
|
||||||
"tier3": {
|
"tier3": {
|
||||||
"min_word_count": 1000,
|
"min_word_count": 850,
|
||||||
"max_word_count": 1500,
|
"max_word_count": 1350,
|
||||||
"min_h2_tags": 2,
|
"min_h2_tags": 2,
|
||||||
"max_h2_tags": 3,
|
"max_h2_tags": 3,
|
||||||
"min_h3_tags": 2,
|
"min_h3_tags": 2,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue