From a8393cdd9657aadb6b637048ec105693d510ff4c Mon Sep 17 00:00:00 2001 From: PeninsulaInd Date: Wed, 22 Oct 2025 15:34:36 -0500 Subject: [PATCH] Fixed: Now listens to model selection in job.json file --- README.md | 527 ++++++++++++++++++++++++++++++ docs/job-schema.md | 21 +- scripts/backfill_site_pages.py | 4 +- src/cli/commands.py | 11 +- src/generation/ai_client.py | 8 +- src/generation/batch_processor.py | 25 +- src/generation/service.py | 27 +- 7 files changed, 596 insertions(+), 27 deletions(-) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..0385f59 --- /dev/null +++ b/README.md @@ -0,0 +1,527 @@ +# Big Link Man - Content Automation & Syndication Platform + +AI-powered content generation and multi-tier link building system with cloud deployment. + +## Quick Start + +```bash +# Install dependencies +uv pip install -r requirements.txt + +# Setup environment +cp env.example .env +# Edit .env with your credentials + +# Initialize database +uv run python scripts/init_db.py + +# Create first admin user +uv run python scripts/create_first_admin.py + +# Run CLI +uv run python main.py --help +``` + +## Environment Configuration + +Required environment variables in `.env`: + +```bash +DATABASE_URL=sqlite:///./content_automation.db +OPENROUTER_API_KEY=your_key_here +BUNNY_ACCOUNT_API_KEY=your_bunny_key_here +``` + +See `env.example` for full configuration options. + +## Database Management + +### Initialize Database +```bash +uv run python scripts/init_db.py +``` + +### Reset Database (drops all data) +```bash +uv run python scripts/init_db.py reset +``` + +### Create First Admin +```bash +uv run python scripts/create_first_admin.py +``` + +### Database Migrations +```bash +# Story 3.1 - Site deployments +uv run python scripts/migrate_story_3.1_sqlite.py + +# Story 3.2 - Anchor text +uv run python scripts/migrate_add_anchor_text.py + +# Story 3.3 - Template fields +uv run python scripts/migrate_add_template_fields.py + +# Story 3.4 - Site pages +uv run python scripts/migrate_add_site_pages.py + +# Story 4.1 - Deployment fields +uv run python scripts/migrate_add_deployment_fields.py + +# Backfill site pages after migration +uv run python scripts/backfill_site_pages.py +``` + +## User Management + +### Add User +```bash +uv run python main.py add-user \ + --username newuser \ + --password password123 \ + --role Admin \ + --admin-user admin \ + --admin-password adminpass +``` + +### List Users +```bash +uv run python main.py list-users \ + --admin-user admin \ + --admin-password adminpass +``` + +### Delete User +```bash +uv run python main.py delete-user \ + --username olduser \ + --admin-user admin \ + --admin-password adminpass +``` + +## Site Management + +### Provision New Site +```bash +uv run python main.py provision-site \ + --name "My Site" \ + --domain www.example.com \ + --storage-name my-storage-zone \ + --region DE \ + --admin-user admin \ + --admin-password adminpass +``` + +Regions: `DE`, `NY`, `LA`, `SG`, `SYD` + +### Attach Domain to Existing Storage +```bash +uv run python main.py attach-domain \ + --name "Another Site" \ + --domain www.another.com \ + --storage-name my-storage-zone \ + --admin-user admin \ + --admin-password adminpass +``` + +### Sync Existing Bunny.net Sites +```bash +# Dry run +uv run python main.py sync-sites \ + --admin-user admin \ + --dry-run + +# Actually import +uv run python main.py sync-sites \ + --admin-user admin +``` + +### List Sites +```bash +uv run python main.py list-sites \ + --admin-user admin \ + --admin-password adminpass +``` + +### Get Site Details +```bash +uv run python main.py get-site \ + --domain www.example.com \ + --admin-user admin \ + --admin-password adminpass +``` + +### Remove Site +```bash +uv run python main.py remove-site \ + --domain www.example.com \ + --admin-user admin \ + --admin-password adminpass +``` + +## Project Management + +### Ingest CORA Report +```bash +uv run python main.py ingest-cora \ + --file shaft_machining.xlsx \ + --name "Shaft Machining Project" \ + --custom-anchors "shaft repair,engine parts" \ + --username admin \ + --password adminpass +``` + +### List Projects +```bash +uv run python main.py list-projects \ + --username admin \ + --password adminpass +``` + +## Content Generation + +### Create Job Configuration +```bash +# Tier 1 only +uv run python create_job_config.py 1 tier1 15 + +# Multi-tier +uv run python create_job_config.py 1 multi 15 50 100 +``` + +### Generate Content Batch +```bash +uv run python main.py generate-batch \ + --job-file jobs/project_1_tier1_15articles.json \ + --username admin \ + --password adminpass +``` + +With options: +```bash +uv run python main.py generate-batch \ + --job-file jobs/my_job.json \ + --username admin \ + --password adminpass \ + --debug \ + --continue-on-error \ + --model gpt-4o-mini +``` + +Available models: `gpt-4o-mini`, `claude-sonnet-4.5` + +**Note:** If your job file contains a `models` config, it will override the `--model` flag and use different models for title, outline, and content generation stages. + +## Deployment + +### Deploy Batch +```bash +# Automatic deployment (runs after generation) +uv run python main.py generate-batch \ + --job-file jobs/my_job.json \ + --username admin \ + --password adminpass + +# Manual deployment +uv run python main.py deploy-batch \ + --batch-id 123 \ + --admin-user admin \ + --admin-password adminpass +``` + +### Dry Run Deployment +```bash +uv run python main.py deploy-batch \ + --batch-id 123 \ + --dry-run +``` + +### Verify Deployment +```bash +# Check all URLs +uv run python main.py verify-deployment --batch-id 123 + +# Check random sample +uv run python main.py verify-deployment \ + --batch-id 123 \ + --sample 10 \ + --timeout 10 +``` + +## Link Export + +### Export Article URLs +```bash +# Tier 1 only +uv run python main.py get-links \ + --project-id 123 \ + --tier 1 + +# Tier 2 and above +uv run python main.py get-links \ + --project-id 123 \ + --tier 2+ + +# With anchor text and destinations +uv run python main.py get-links \ + --project-id 123 \ + --tier 2+ \ + --with-anchor-text \ + --with-destination-url +``` + +Output is CSV format to stdout. Redirect to save: +```bash +uv run python main.py get-links \ + --project-id 123 \ + --tier 1 > tier1_urls.csv +``` + +## Utility Scripts + +### Check Last Generated Content +```bash +uv run python check_last_gen.py +``` + +### List All Users (Direct DB Access) +```bash +uv run python scripts/list_users.py +``` + +### Add Admin (Direct DB Access) +```bash +uv run python scripts/add_admin_direct.py +``` + +### Check Migration Status +```bash +uv run python scripts/check_migration.py +``` + +### Add Tier to Projects +```bash +uv run python scripts/add_tier_to_projects.py +``` + +## Testing + +### Run All Tests +```bash +uv run pytest +``` + +### Run Unit Tests +```bash +uv run pytest tests/unit/ -v +``` + +### Run Integration Tests +```bash +uv run pytest tests/integration/ -v +``` + +### Run Specific Test File +```bash +uv run pytest tests/unit/test_url_generator.py -v +``` + +### Run Story 3.1 Tests +```bash +uv run pytest tests/unit/test_url_generator.py \ + tests/unit/test_site_provisioning.py \ + tests/unit/test_site_assignment.py \ + tests/unit/test_job_config_extensions.py \ + tests/integration/test_story_3_1_integration.py \ + -v +``` + +### Run with Coverage +```bash +uv run pytest --cov=src --cov-report=html +``` + +## System Information + +### Show Configuration +```bash +uv run python main.py config +``` + +### Health Check +```bash +uv run python main.py health +``` + +### List Available Models +```bash +uv run python main.py models +``` + +## Directory Structure + +``` +Big-Link-Man/ +├── main.py # CLI entry point +├── src/ # Source code +│ ├── api/ # FastAPI endpoints +│ ├── auth/ # Authentication +│ ├── cli/ # CLI commands +│ ├── core/ # Configuration +│ ├── database/ # Models, repositories +│ ├── deployment/ # Cloud deployment +│ ├── generation/ # Content generation +│ ├── ingestion/ # CORA parsing +│ ├── interlinking/ # Link injection +│ └── templating/ # HTML templates +├── scripts/ # Database & utility scripts +├── tests/ # Test suite +│ ├── unit/ +│ └── integration/ +├── jobs/ # Job configuration files +├── docs/ # Documentation +└── deployment_logs/ # Deployed URL logs +``` + +## Job Configuration Format + +Example job config (`jobs/example.json`): + +```json +{ + "job_name": "Multi-Tier Launch", + "project_id": 1, + "description": "Site build with 165 articles", + "models": { + "title": "openai/gpt-4o-mini", + "outline": "anthropic/claude-3.5-sonnet", + "content": "anthropic/claude-3.5-sonnet" + }, + "tiers": [ + { + "tier": 1, + "article_count": 15, + "validation_attempts": 3 + }, + { + "tier": 2, + "article_count": 50, + "validation_attempts": 2 + } + ], + "failure_config": { + "max_consecutive_failures": 10, + "skip_on_failure": true + }, + "interlinking": { + "links_per_article_min": 2, + "links_per_article_max": 4, + "include_home_link": true + }, + "deployment_targets": ["www.primary.com"], + "tier1_preferred_sites": ["www.premium.com"], + "auto_create_sites": true +} +``` + +### Per-Stage Model Configuration + +You can specify different AI models for each generation stage (title, outline, content): + +```json +{ + "models": { + "title": "openai/gpt-4o-mini", + "outline": "anthropic/claude-3.5-sonnet", + "content": "openai/gpt-4o" + } +} +``` + +**Available models:** +- `openai/gpt-4o-mini` - Fast and cost-effective +- `openai/gpt-4o` - Higher quality, more expensive +- `anthropic/claude-3.5-sonnet` - Excellent for long-form content + +If `models` is not specified in the job file, all stages use the model from the `--model` CLI flag (default: `gpt-4o-mini`). + +## Common Workflows + +### Initial Setup +```bash +uv pip install -r requirements.txt +cp env.example .env +# Edit .env +uv run python scripts/init_db.py +uv run python scripts/create_first_admin.py +uv run python main.py sync-sites --admin-user admin +``` + +### New Project Workflow +```bash +# 1. Ingest CORA report +uv run python main.py ingest-cora \ + --file project.xlsx \ + --name "My Project" \ + --username admin \ + --password adminpass + +# 2. Create job config +uv run python create_job_config.py 1 multi 15 50 100 + +# 3. Generate content (auto-deploys) +uv run python main.py generate-batch \ + --job-file jobs/project_1_multi_3tiers_165articles.json \ + --username admin \ + --password adminpass + +# 4. Verify deployment +uv run python main.py verify-deployment --batch-id 1 + +# 5. Export URLs for link building +uv run python main.py get-links \ + --project-id 1 \ + --tier 1 > tier1_urls.csv +``` + +### Re-deploy After Changes +```bash +uv run python main.py deploy-batch \ + --batch-id 123 \ + --admin-user admin \ + --admin-password adminpass +``` + +## Troubleshooting + +### Database locked +```bash +# Stop any running processes, then: +uv run python scripts/init_db.py reset +``` + +### Missing dependencies +```bash +uv pip install -r requirements.txt --force-reinstall +``` + +### AI API errors +Check `OPENROUTER_API_KEY` in `.env` + +### Bunny.net authentication failed +Check `BUNNY_ACCOUNT_API_KEY` in `.env` + +### Storage upload failed +Verify `storage_zone_password` in database (set during site provisioning) + +## Documentation + +- Product Requirements: `docs/prd.md` +- Architecture: `docs/architecture/` +- Implementation Summaries: `STORY_*.md` files +- Quick Start Guides: `*_QUICKSTART.md` files + +## License + +All rights reserved. + diff --git a/docs/job-schema.md b/docs/job-schema.md index 0ec2959..9a1635a 100644 --- a/docs/job-schema.md +++ b/docs/job-schema.md @@ -35,7 +35,7 @@ Each job object defines a complete content generation batch for a specific proje | Field | Type | Default | Description | |-------|------|---------|-------------| -| `models` | `Object` | Uses CLI default | AI models to use for each generation stage (Story 2.3 - planned) | +| `models` | `Object` | Uses CLI default | AI models to use for each generation stage (title, outline, content) | | `deployment_targets` | `Array` | `null` | Array of site custom_hostnames for tier1 deployment assignment (Story 2.5) | | `tier1_preferred_sites` | `Array` | `null` | Array of hostnames for tier1 site assignment priority (Story 3.1) | | `auto_create_sites` | `boolean` | `false` | Whether to auto-create sites when pool is insufficient (Story 3.1) | @@ -174,13 +174,13 @@ Each tier in the `tiers` object defines content generation parameters for that s } ``` -## AI Model Configuration (Story 2.3 - Not Yet Implemented) +## AI Model Configuration ### `models` - **Type**: `Object` (optional) - **Purpose**: Specifies AI models to use for each generation stage - **Behavior**: Allows different models for title, outline, and content generation -- **Note**: Currently not parsed by job config - uses CLI `--model` flag instead +- **Note**: If not specified, all stages use the model from CLI `--model` flag (default: `gpt-4o-mini`) #### Models Object Fields | Field | Type | Description | @@ -210,7 +210,7 @@ Each tier in the `tiers` object defines content generation parameters for that s ``` ### Implementation Status -This field is defined in the JSON schema but **not yet implemented** in the job config parser (`src/generation/job_config.py`). Currently, all stages use the same model specified via CLI `--model` flag. +**Implemented** - The `models` field is fully functional. Different models can be specified for title, outline, and content generation stages. If a job file contains a `models` configuration and you also use the `--model` CLI flag, the system will warn you that the CLI flag is being ignored in favor of the job config. ## Tiered Link Configuration (Story 3.2) @@ -299,7 +299,7 @@ This field is defined in the JSON schema but **not yet implemented** in the job ### Job Level Validation - `project_id` must be a positive integer - `tiers` must be an object with at least one tier -- `models` must be an object with `title`, `outline`, and `content` fields (if specified) - **NOT YET VALIDATED** +- `models` must be an object with `title`, `outline`, and `content` fields (if specified) - `deployment_targets` must be an array of strings (if specified) - `tier1_preferred_sites` must be an array of strings (if specified) - `auto_create_sites` must be a boolean (if specified) @@ -331,7 +331,7 @@ uv run python main.py generate-batch --job-file jobs/example.json --username adm - `--password, -p`: Password for authentication - `--debug`: Save AI responses to debug_output/ - `--continue-on-error`: Continue processing if article generation fails -- `--model, -m`: AI model to use (default: gpt-4o-mini) +- `--model, -m`: AI model to use (default: gpt-4o-mini). Overridden by job file `models` config if present. ## Implementation History @@ -340,10 +340,11 @@ uv run python main.py generate-batch --job-file jobs/example.json --username adm - Added tier configuration with word count and heading constraints - Added tier defaults for common configurations -### Story 2.3: AI Content Generation (Partial) -- **Implemented**: Database fields for tracking models (title_model, outline_model, content_model) -- **Not Implemented**: Job config `models` field - currently uses CLI `--model` flag -- **Planned**: Per-stage model selection from job configuration +### Story 2.3: AI Content Generation +- **Implemented**: Per-stage model selection via job config `models` field +- **Implemented**: Dynamic model switching in AIClient with `override_model` parameter +- **Implemented**: CLI warning when job contains models but `--model` flag is used +- **Behavior**: Job file `models` config takes precedence over CLI `--model` flag ### Story 2.5: Deployment Target Assignment - Added `deployment_targets` field for tier1 site assignment diff --git a/scripts/backfill_site_pages.py b/scripts/backfill_site_pages.py index abab72c..2e958ef 100644 --- a/scripts/backfill_site_pages.py +++ b/scripts/backfill_site_pages.py @@ -14,7 +14,7 @@ from src.database.session import db_manager from src.database.repositories import SiteDeploymentRepository, SitePageRepository, UserRepository from src.templating.service import TemplateService from src.generation.site_page_generator import generate_site_pages -from src.auth.password import verify_password +from src.auth.service import AuthService logging.basicConfig( level=logging.INFO, @@ -44,7 +44,7 @@ def backfill_site_pages( user_repo = UserRepository(session) user = user_repo.get_by_username(username) - if not user or not verify_password(password, user.hashed_password): + if not user or not AuthService.verify_password(password, user.hashed_password): logger.error("Authentication failed") session.close() sys.exit(1) diff --git a/src/cli/commands.py b/src/cli/commands.py index d560389..a67b793 100644 --- a/src/cli/commands.py +++ b/src/cli/commands.py @@ -932,7 +932,16 @@ def generate_batch( click.echo("Please set OPENROUTER_API_KEY in your .env file", err=True) raise click.Abort() - click.echo(f"Initializing AI client with model: {model}") + from src.generation.job_config import JobConfig + job_config = JobConfig(job_file) + jobs = job_config.get_jobs() + + has_models_in_job = any(job.models is not None for job in jobs) + if has_models_in_job and model != 'gpt-4o-mini': + click.echo(f"Warning: Job file contains per-stage model configuration.") + click.echo(f" The --model flag will be ignored in favor of job config.\n") + + click.echo(f"Initializing AI client with default model: {model}") ai_client = AIClient(api_key=api_key, model=model) prompt_manager = PromptManager() diff --git a/src/generation/ai_client.py b/src/generation/ai_client.py index 6d77e7c..76d8b2d 100644 --- a/src/generation/ai_client.py +++ b/src/generation/ai_client.py @@ -37,7 +37,8 @@ class AIClient: system_message: Optional[str] = None, max_tokens: int = 4000, temperature: float = 0.7, - json_mode: bool = False + json_mode: bool = False, + override_model: Optional[str] = None ) -> str: """ Generate completion from OpenRouter API @@ -48,6 +49,7 @@ class AIClient: max_tokens: Maximum tokens to generate temperature: Sampling temperature (0-1) json_mode: If True, requests JSON response format + override_model: If provided, use this model instead of self.model Returns: Generated text completion @@ -57,8 +59,10 @@ class AIClient: messages.append({"role": "system", "content": system_message}) messages.append({"role": "user", "content": prompt}) + model_to_use = override_model if override_model else self.model + kwargs: Dict[str, Any] = { - "model": self.model, + "model": model_to_use, "messages": messages, "max_tokens": max_tokens, "temperature": temperature diff --git a/src/generation/batch_processor.py b/src/generation/batch_processor.py index b93e452..761e4d6 100644 --- a/src/generation/batch_processor.py +++ b/src/generation/batch_processor.py @@ -82,12 +82,20 @@ class BatchProcessor: auto_deploy: bool = True ): """Process a single job""" + self.current_job = job + project = self.project_repo.get_by_id(job.project_id) if not project: raise ValueError(f"Project {job.project_id} not found") click.echo(f"\nProcessing Job {job_idx}/{self.stats['total_jobs']}: Project ID {job.project_id}") + if job.models: + click.echo(f" Using per-stage models:") + click.echo(f" Title: {job.models.title}") + click.echo(f" Outline: {job.models.outline}") + click.echo(f" Content: {job.models.content}") + resolved_targets = {} if job.deployment_targets: if not self.site_deployment_repo: @@ -205,6 +213,8 @@ class BatchProcessor: """Generate a single article""" prefix = f" [{article_num}/{tier_config.count}]" + models = self.current_job.models if hasattr(self, 'current_job') and self.current_job.models else None + site_deployment_id = assign_site_for_article(article_index, resolved_targets) if site_deployment_id: @@ -214,7 +224,11 @@ class BatchProcessor: click.echo(f"{prefix} No site assignment (index {article_index} >= {len(resolved_targets)} targets)") click.echo(f"{prefix} Generating title...") - title = self.generator.generate_title(project_id, debug=debug) + title = self.generator.generate_title( + project_id, + debug=debug, + model=models.title if models else None + ) click.echo(f"{prefix} Generated title: \"{title}\"") click.echo(f"{prefix} Generating outline...") @@ -225,7 +239,8 @@ class BatchProcessor: max_h2=tier_config.max_h2_tags, min_h3=tier_config.min_h3_tags, max_h3=tier_config.max_h3_tags, - debug=debug + debug=debug, + model=models.outline if models else None ) h2_count = len(outline["outline"]) @@ -239,7 +254,8 @@ class BatchProcessor: outline=outline, min_word_count=tier_config.min_word_count, max_word_count=tier_config.max_word_count, - debug=debug + debug=debug, + model=models.content if models else None ) word_count = self.generator.count_words(content) @@ -253,7 +269,8 @@ class BatchProcessor: content=content, target_word_count=tier_config.min_word_count, debug=debug, - project_id=project_id + project_id=project_id, + model=models.content if models else None ) word_count = self.generator.count_words(content) click.echo(f"{prefix} Augmented content: {word_count:,} words") diff --git a/src/generation/service.py b/src/generation/service.py index dd572bf..fb910f0 100644 --- a/src/generation/service.py +++ b/src/generation/service.py @@ -32,13 +32,14 @@ class ContentGenerator: self.template_service = template_service or TemplateService(content_repo) self.site_deployment_repo = site_deployment_repo - def generate_title(self, project_id: int, debug: bool = False) -> str: + def generate_title(self, project_id: int, debug: bool = False, model: Optional[str] = None) -> str: """ Generate SEO-optimized title Args: project_id: Project ID to generate title for debug: If True, save response to debug_output/ + model: Optional model override for this generation stage Returns: Generated title string @@ -61,7 +62,8 @@ class ContentGenerator: prompt=user_prompt, system_message=system_msg, max_tokens=100, - temperature=0.7 + temperature=0.7, + override_model=model ) title = title.strip().strip('"').strip("'") @@ -81,7 +83,8 @@ class ContentGenerator: max_h2: int, min_h3: int, max_h3: int, - debug: bool = False + debug: bool = False, + model: Optional[str] = None ) -> dict: """ Generate article outline in JSON format @@ -94,6 +97,7 @@ class ContentGenerator: min_h3: Minimum H3 subheadings total max_h3: Maximum H3 subheadings total debug: If True, save response to debug_output/ + model: Optional model override for this generation stage Returns: Outline dictionary: {"outline": [{"h2": "...", "h3": ["...", "..."]}]} @@ -125,7 +129,8 @@ class ContentGenerator: system_message=system_msg, max_tokens=2000, temperature=0.7, - json_mode=True + json_mode=True, + override_model=model ) print(f"[DEBUG] Raw outline response: {outline_json}") # Save raw response immediately @@ -168,7 +173,8 @@ class ContentGenerator: outline: dict, min_word_count: int, max_word_count: int, - debug: bool = False + debug: bool = False, + model: Optional[str] = None ) -> str: """ Generate full article HTML fragment @@ -180,6 +186,7 @@ class ContentGenerator: min_word_count: Minimum word count for guidance max_word_count: Maximum word count for guidance debug: If True, save response to debug_output/ + model: Optional model override for this generation stage Returns: HTML string with

,

,

tags @@ -207,7 +214,8 @@ class ContentGenerator: prompt=user_prompt, system_message=system_msg, max_tokens=8000, - temperature=0.7 + temperature=0.7, + override_model=model ) content = content.strip() @@ -255,7 +263,8 @@ class ContentGenerator: content: str, target_word_count: int, debug: bool = False, - project_id: Optional[int] = None + project_id: Optional[int] = None, + model: Optional[str] = None ) -> str: """ Expand article content to meet minimum word count @@ -265,6 +274,7 @@ class ContentGenerator: target_word_count: Target word count debug: If True, save response to debug_output/ project_id: Optional project ID for debug output + model: Optional model override for this generation stage Returns: Expanded HTML content @@ -279,7 +289,8 @@ class ContentGenerator: prompt=user_prompt, system_message=system_msg, max_tokens=8000, - temperature=0.7 + temperature=0.7, + override_model=model ) augmented = augmented.strip()