From 6e2977c5002f3938cc3b7a4321565a19f8688ae7 Mon Sep 17 00:00:00 2001 From: PeninsulaInd Date: Fri, 16 Jan 2026 15:53:07 -0600 Subject: [PATCH] Add tier1 branded anchor text ratio flag to ingest-cora command - Add --tier1-branded-ratio flag (default: 0.75) to ingest-cora command - Prompt for branded anchor text when ratio is specified - Generate explicit anchor_text_config in tier1 job with specified ratio - Update documentation in CLI_COMMAND_REFERENCE.md, job-schema.md, and gui-planning.md --- docs/CLI_COMMAND_REFERENCE.md | 11 + docs/gui-planning.md | 1301 +++++++++++++++++++++++++++++++++ docs/job-schema.md | 149 +++- src/cli/commands.py | 209 +++++- 4 files changed, 1649 insertions(+), 21 deletions(-) create mode 100644 docs/gui-planning.md diff --git a/docs/CLI_COMMAND_REFERENCE.md b/docs/CLI_COMMAND_REFERENCE.md index 62e075a..64f6754 100644 --- a/docs/CLI_COMMAND_REFERENCE.md +++ b/docs/CLI_COMMAND_REFERENCE.md @@ -350,6 +350,9 @@ Ingest a CORA .xlsx report and create a new project - `--custom-anchors`, `-a` - Type: STRING | Comma-separated list of custom anchor text (optional) +- `--tier1-branded-ratio` + - Type: FLOAT | Ratio of branded anchor text for tier1 (default: 0.75). When specified, prompts for branded anchor text (company name) and configures tier1 job with explicit anchor text terms achieving the specified ratio. + - `--username`, `-u` - Type: STRING | Username for authentication @@ -362,6 +365,14 @@ Ingest a CORA .xlsx report and create a new project uv run python main.py ingest-cora --file path/to/file.xlsx --name "My Project" ``` +**Example with branded anchor text ratio:** + +```bash +uv run python main.py ingest-cora --file path/to/file.xlsx --name "My Project" --tier1-branded-ratio 0.75 +``` + +When using `--tier1-branded-ratio`, you will be prompted to enter the branded anchor text (company name). The generated job file will include tier1 anchor_text_config with explicit mode, where the specified percentage of terms are branded and the remainder are main keyword variations. + --- ### `ingest-simple` diff --git a/docs/gui-planning.md b/docs/gui-planning.md new file mode 100644 index 0000000..ccb35ed --- /dev/null +++ b/docs/gui-planning.md @@ -0,0 +1,1301 @@ +# GUI Planning Document + +Comprehensive mapping of all CLI commands, scripts, and their parameters to guide front-end GUI development. + +## Main Workflows + +### 1. Project Creation Workflow +- Ingest CORA file → Create project → Create job file → Generate content → Deploy + +### 2. Site Management Workflow +- Discover/Provision sites → Sync sites → Manage domains → Deploy content + +### 3. Content Generation Workflow +- Create/Edit job file → Generate batch → Verify → Deploy → Export links + +### 4. Maintenance Workflow +- Update index pages → Add robots.txt → Verify deployments → Check status + +--- + +## CLI Commands Reference + +### System Commands + +#### `config` +Show current configuration + +**Parameters:** None + +**Example:** +```bash +uv run python main.py config +``` + +--- + +#### `health` +Check system health + +**Parameters:** None + +**Example:** +```bash +uv run python main.py health +``` + +--- + +#### `models` +List available AI models + +**Parameters:** None + +**Example:** +```bash +uv run python main.py models +``` + +--- + +### User Management + +#### `add-user` +Create a new user (requires admin authentication) + +**Parameters:** +- `--username` (required) - Username for the new user +- `--password` (required) - Password for the new user +- `--role` (required) - Role: "Admin" or "User" +- `--admin-user` (optional) - Admin username for authentication +- `--admin-password` (optional) - Admin password for authentication + +**Example:** +```bash +uv run python main.py add-user --username newuser --password pass123 --role Admin +``` + +--- + +#### `delete-user` +Delete a user by username (requires admin authentication) + +**Parameters:** +- `--username` (required) - Username to delete +- `--admin-user` (optional) - Admin username for authentication +- `--admin-password` (optional) - Admin password for authentication +- `--yes` (flag) - Skip confirmation prompt + +**Example:** +```bash +uv run python main.py delete-user --username olduser --yes +``` + +--- + +#### `list-users` +List all users (requires admin authentication) + +**Parameters:** +- `--admin-user` (optional) - Admin username for authentication +- `--admin-password` (optional) - Admin password for authentication + +**Example:** +```bash +uv run python main.py list-users +``` + +--- + +### Site Management + +#### `provision-site` +Provision a new site with Storage Zone and Pull Zone (requires admin) + +**Parameters:** +- `--name` (required) - Site name +- `--domain` (required) - Custom domain (FQDN, e.g., www.example.com) +- `--storage-name` (required) - Storage Zone name (must be globally unique) +- `--region` (required) - Storage region: "DE", "NY", "LA", "SG", "SYD" +- `--admin-user` (optional) - Admin username for authentication +- `--admin-password` (optional) - Admin password for authentication + +**Example:** +```bash +uv run python main.py provision-site --name "My Site" --domain www.example.com --storage-name mysite-storage --region NY +``` + +--- + +#### `attach-domain` +Attach a domain to an existing Storage Zone (requires admin) + +**Parameters:** +- `--name` (required) - Site name +- `--domain` (required) - Custom domain (FQDN, e.g., www.example.com) +- `--storage-name` (required) - Existing Storage Zone name +- `--admin-user` (optional) - Admin username for authentication +- `--admin-password` (optional) - Admin password for authentication + +**Example:** +```bash +uv run python main.py attach-domain --name "New Site" --domain new.example.com --storage-name existing-storage +``` + +--- + +#### `list-sites` +List all site deployments (requires admin) + +**Parameters:** +- `--admin-user` (optional) - Admin username for authentication +- `--admin-password` (optional) - Admin password for authentication + +**Example:** +```bash +uv run python main.py list-sites +``` + +--- + +#### `get-site` +Get detailed information about a site deployment (requires admin) + +**Parameters:** +- `--domain` (required) - Custom domain to lookup +- `--admin-user` (optional) - Admin username for authentication +- `--admin-password` (optional) - Admin password for authentication + +**Example:** +```bash +uv run python main.py get-site --domain www.example.com +``` + +--- + +#### `remove-site` +Remove a site deployment record (requires admin) + +**Parameters:** +- `--domain` (required) - Custom domain to remove +- `--admin-user` (optional) - Admin username for authentication +- `--admin-password` (optional) - Admin password for authentication +- `--yes` (flag) - Skip confirmation prompt + +**Example:** +```bash +uv run python main.py remove-site --domain www.example.com --yes +``` + +--- + +#### `sync-sites` +Sync existing bunny.net sites with custom domains to database (requires admin) + +**Parameters:** +- `--admin-user` (optional) - Admin username for authentication +- `--admin-password` (optional) - Admin password for authentication +- `--dry-run` (flag) - Show what would be imported without making changes + +**Example:** +```bash +uv run python main.py sync-sites --dry-run +``` + +--- + +#### `discover-s3-buckets` +Discover and register AWS S3 buckets as site deployments + +**Parameters:** None (runs the discovery script) + +**Example:** +```bash +uv run python main.py discover-s3-buckets +``` + +--- + +### Project Management + +#### `ingest-cora` +Ingest a CORA .xlsx report and create a new project + +**Parameters:** +- `--file` / `-f` (required) - Path to CORA .xlsx file +- `--name` / `-n` (required) - Project name +- `--money-site-url` / `-m` (optional) - Money site URL (e.g., https://example.com) +- `--custom-anchors` / `-a` (optional) - Comma-separated list of custom anchor text +- `--tier1-branded-ratio` (optional) - Ratio of branded anchor text for tier1 (default: 0.75). When specified, prompts for branded anchor text and configures tier1 job with explicit anchor text terms achieving the specified ratio. +- `--username` / `-u` (optional) - Username for authentication +- `--password` / `-p` (optional) - Password for authentication + +**Example:** +```bash +uv run python main.py ingest-cora --file report.xlsx --name "My Project" --money-site-url https://example.com +``` + +--- + +#### `ingest-simple` +Ingest a simple spreadsheet and create a new project + +**Expected spreadsheet format:** +- First row: Headers (main_keyword, project_name, related_searches, entities) +- Second row: Data values + +**Required columns:** main_keyword, project_name, related_searches, entities + +**Optional columns:** word_count (default: 1500), term_frequency (default: 3) + +**Parameters:** +- `--file` / `-f` (required) - Path to simple .xlsx spreadsheet file +- `--name` / `-n` (optional) - Project name (overrides project_name from spreadsheet) +- `--money-site-url` / `-m` (optional) - Money site URL (e.g., https://example.com) +- `--username` / `-u` (optional) - Username for authentication +- `--password` / `-p` (optional) - Password for authentication + +**Example:** +```bash +uv run python main.py ingest-simple --file simple.xlsx +``` + +--- + +#### `list-projects` +List all projects for the authenticated user + +**Parameters:** +- `--username` / `-u` (optional) - Username for authentication +- `--password` / `-p` (optional) - Password for authentication + +**Example:** +```bash +uv run python main.py list-projects +``` + +--- + +#### `create-job` +Create a job file from an existing project ID + +**Parameters:** +- `--project-id` / `-p` (required) - Project ID to create job file for +- `--deployment-targets` / `-d` (multiple, optional) - Deployment target hostnames (can specify multiple times) +- `--tier1-count` (optional, default: 10) - Number of tier1 articles +- `--tier2-count` (optional, default: 30) - Number of tier2 articles +- `--output` / `-o` (optional) - Output file path (default: jobs/{project_name}.json) +- `--username` / `-u` (optional) - Username for authentication +- `--password` / `-pwd` (optional) - Password for authentication + +**Example:** +```bash +uv run python main.py create-job --project-id 1 --tier1-count 15 --tier2-count 50 --deployment-targets www.site1.com --deployment-targets www.site2.com +``` + +--- + +### Content Generation + +#### `generate-batch` +Generate content batch from job file + +**Parameters:** +- `--job-file` / `-j` (required) - Path to job JSON file +- `--username` / `-u` (optional) - Username for authentication +- `--password` / `-p` (optional) - Password for authentication +- `--debug` (flag) - Save AI responses to debug_output/ +- `--continue-on-error` (flag) - Continue processing if article generation fails +- `--model` / `-m` (optional, default: "gpt-4o-mini") - AI model to use (gpt-4o-mini, x-ai/grok-4-fast) + +**Example:** +```bash +uv run python main.py generate-batch --job-file jobs/my-project.json --debug --continue-on-error +``` + +--- + +### Deployment + +#### `deploy-batch` +Deploy all content in a batch to cloud storage + +**Parameters:** +- `--batch-id` / `-b` (required) - Project/batch ID to deploy +- `--username` / `-u` (optional) - Username for authentication +- `--password` / `-p` (optional) - Password for authentication +- `--continue-on-error` (flag, default: True) - Continue if file fails +- `--dry-run` (flag) - Preview what would be deployed + +**Example:** +```bash +uv run python main.py deploy-batch --batch-id 1 --dry-run +``` + +--- + +#### `verify-deployment` +Verify deployed URLs return 200 OK status + +**Parameters:** +- `--batch-id` / `-b` (required) - Project/batch ID to verify +- `--sample` / `-s` (optional) - Number of random URLs to check (default: check all) +- `--timeout` / `-t` (optional, default: 10) - Request timeout in seconds + +**Example:** +```bash +uv run python main.py verify-deployment --batch-id 1 --sample 10 --timeout 15 +``` + +--- + +### Link Export + +#### `get-links` +Export article URLs with optional link details for a project and tier + +**Parameters:** +- `--project-id` / `-p` (required) - Project ID to get links for +- `--tier` / `-t` (required) - Tier to filter (e.g., "1" or "2+" for tier 2 and above) +- `--with-anchor-text` (flag) - Include anchor text used for tiered links +- `--with-destination-url` (flag) - Include destination URL that the article links to + +**Example:** +```bash +uv run python main.py get-links --project-id 1 --tier 1 --with-anchor-text --with-destination-url +``` + +--- + +## Scripts Reference + +### Site Management Scripts + +#### `discover_s3_buckets.py` +Discover and register AWS S3 buckets as site deployments + +**Parameters:** +- `--auto-import-all` (flag) - Automatically import all unregistered buckets + +**Usage:** +```bash +# Interactive mode (select buckets manually) +uv run python scripts/discover_s3_buckets.py + +# Auto-import mode (import all unregistered buckets) +uv run python scripts/discover_s3_buckets.py --auto-import-all +``` + +**Features:** +- Discovers all S3 buckets in AWS account +- Skips buckets already registered in database +- Respects exclusion list in `s3_bucket_exclusions.txt` +- Automatically detects bucket region + +--- + +#### `list_s3_fqdn_sites.py` +List all S3 sites with custom domains (FQDNs) + +**Parameters:** None + +**Usage:** +```bash +uv run python scripts/list_s3_fqdn_sites.py +``` + +--- + +#### `list_sites_by_id.py` +List site deployments by specific IDs + +**Parameters:** None (edit script to set target_ids) + +**Usage:** +```bash +# Edit the script to set target_ids, then: +uv run python scripts/list_sites_by_id.py +``` + +--- + +#### `delete_sites.py` +Delete site deployments by ID + +**Parameters:** None (edit script to set site_ids) + +**Usage:** +```bash +# Edit the script to set site_ids, then: +uv run python scripts/delete_sites.py +``` + +**Warning:** This permanently deletes site records from the database. + +--- + +#### `check_sites.py` +Check site status + +**Parameters:** None + +**Usage:** +```bash +uv run python scripts/check_sites.py +``` + +--- + +#### `check_specific_sites.py` +Check specific sites + +**Parameters:** None + +**Usage:** +```bash +uv run python scripts/check_specific_sites.py +``` + +--- + +### Deployment Scripts + +#### `add_robots_txt_to_buckets.py` +Add standardized `robots.txt` files to all storage buckets (both S3 and Bunny) + +**Parameters:** +- `--dry-run` (flag) - Preview what would be done without actually uploading +- `--provider` (optional) - Only process specific provider: "s3" or "bunny" + +**Usage:** +```bash +# Preview what would be done (recommended first) +uv run python scripts/add_robots_txt_to_buckets.py --dry-run + +# Upload to all buckets +uv run python scripts/add_robots_txt_to_buckets.py + +# Only process S3 buckets +uv run python scripts/add_robots_txt_to_buckets.py --provider s3 + +# Only process Bunny storage zones +uv run python scripts/add_robots_txt_to_buckets.py --provider bunny +``` + +**robots.txt behavior:** +- ✅ Allows: Google, Bing, Yahoo, DuckDuckGo, Baidu, Yandex +- ✅ Allows: GPTBot, Claude, Common Crawl, Perplexity, ByteDance AI +- ❌ Blocks: Ahrefs, Semrush, Moz, and other SEO tools +- ❌ Blocks: HTTrack, Wget, and other scrapers/bad bots + +--- + +#### `update_index_pages.py` +Automatically generate or update `index.html` and `sitemap.xml` files for all storage buckets + +**Parameters:** +- `--dry-run` (flag) - Preview what would be updated without actually uploading +- `--provider` (optional) - Only process specific provider: "s3" or "bunny" +- `--force` (flag) - Force update even if no changes detected +- `--limit` (optional, integer) - Limit number of sites to process (useful for testing) +- `--hostname` (optional, string) - Process only specific hostname (exact match) + +**Usage:** +```bash +# Preview what would be updated (recommended first) +uv run python scripts/update_index_pages.py --dry-run + +# Update all buckets +uv run python scripts/update_index_pages.py + +# Only process S3 buckets +uv run python scripts/update_index_pages.py --provider s3 + +# Only process Bunny storage zones +uv run python scripts/update_index_pages.py --provider bunny + +# Force update even if no changes detected +uv run python scripts/update_index_pages.py --force + +# Test on specific site +uv run python scripts/update_index_pages.py --hostname example.com + +# Limit number of sites (useful for testing) +uv run python scripts/update_index_pages.py --limit 10 +``` + +**Features:** +- Lists all HTML files in each bucket's root directory +- Extracts titles from `` tags (or formats filenames as fallback) +- Generates article listings sorted by most recent modification date +- Creates or updates `index.html` with article links +- Generates `sitemap.xml` with industry-standard settings +- Tracks last run timestamps to avoid unnecessary updates +- Excludes boilerplate pages: `index.html`, `about.html`, `privacy.html`, `contact.html` + +--- + +### Database Scripts + +#### `init_db.py` +Initialize or reset the database + +**Parameters:** None (or "reset" argument) + +**Usage:** +```bash +# Initialize database +uv run python scripts/init_db.py + +# Reset database (drops all data) +uv run python scripts/init_db.py reset +``` + +--- + +#### `create_first_admin.py` +Create the first admin user in the system + +**Parameters:** None (interactive) + +**Usage:** +```bash +uv run python scripts/create_first_admin.py +``` + +--- + +#### `add_admin_direct.py` +Add an admin user directly to the database (bypasses authentication) + +**Parameters:** None (interactive) + +**Usage:** +```bash +uv run python scripts/add_admin_direct.py +``` + +**Use case:** When you're locked out or need to create an admin without existing admin credentials. + +--- + +#### `list_users.py` +List all users in the database (direct database access) + +**Parameters:** None + +**Usage:** +```bash +uv run python scripts/list_users.py +``` + +--- + +#### Migration Scripts +Database schema migrations for various features + +**Scripts:** +- `migrate_story_3.1_sqlite.py` - Site deployments +- `migrate_add_anchor_text.py` - Anchor text +- `migrate_add_template_fields.py` - Template fields +- `migrate_add_site_pages.py` - Site pages +- `migrate_add_deployment_fields.py` - Deployment fields +- `migrate_add_multi_cloud_storage_fields.py` - Multi-cloud storage fields +- `migrate_add_image_fields.py` - Image fields + +**Parameters:** None + +**Usage:** +```bash +uv run python scripts/migrate_story_3.1_sqlite.py +``` + +--- + +#### `backfill_site_pages.py` +Backfill site pages after running the site pages migration + +**Parameters:** None + +**Usage:** +```bash +uv run python scripts/backfill_site_pages.py +``` + +--- + +#### `check_migration.py` +Check the status of database migrations + +**Parameters:** None + +**Usage:** +```bash +uv run python scripts/check_migration.py +``` + +--- + +#### `add_tier_to_projects.py` +Add tier field to existing projects + +**Parameters:** None + +**Usage:** +```bash +uv run python scripts/add_tier_to_projects.py +``` + +--- + +### Content Management Scripts + +#### `check_last_gen.py` +Check the last generated content batch + +**Parameters:** None + +**Usage:** +```bash +uv run python check_last_gen.py +``` + +--- + +#### `check_project_url.py` +Check project URL + +**Parameters:** None (edit script for project_id) + +**Usage:** +```bash +# Edit the script to set project_id, then: +uv run python scripts/check_project_url.py +``` + +--- + +#### `check_job_last_run.py` +Check job last run + +**Parameters:** None (edit script for project_id) + +**Usage:** +```bash +# Edit the script to set project_id, then: +uv run python scripts/check_job_last_run.py +``` + +--- + +#### `fix_tier1_money_links.py` +Fix tier1 money links + +**Parameters:** None (edit script for project_id) + +**Usage:** +```bash +# Edit the script to set project_id, then: +uv run python scripts/fix_tier1_money_links.py +``` + +--- + +#### `investigate_tier1_links.py` +Investigate tier1 links + +**Parameters:** None (edit script for project_id) + +**Usage:** +```bash +# Edit the script to set project_id, then: +uv run python scripts/investigate_tier1_links.py +``` + +--- + +#### `list_t1_articles.py` +List tier1 articles + +**Parameters:** None + +**Usage:** +```bash +uv run python scripts/list_t1_articles.py +``` + +--- + +#### `check_orphaned_articles.py` +Check for orphaned articles + +**Parameters:** None + +**Usage:** +```bash +uv run python scripts/check_orphaned_articles.py +``` + +--- + +#### `fix_orphaned_articles.py` +Fix orphaned articles + +**Parameters:** None + +**Usage:** +```bash +uv run python scripts/fix_orphaned_articles.py +``` + +--- + +### Utility Scripts + +#### `generate_cli_docs.py` +Regenerate the CLI command reference documentation + +**Parameters:** None + +**Usage:** +```bash +uv run python scripts/generate_cli_docs.py +``` + +Generates `docs/CLI_COMMAND_REFERENCE.md` from Click command definitions. + +--- + +#### Test Scripts +Various testing and debugging scripts + +**Scripts:** +- `test_image_generation.py` - Test image generation +- `test_image_reinsertion.py` - Test image reinsertion +- `test_s3_upload.py` - Test S3 upload +- `test_s3_uploads_localtris.py` - Test S3 uploads for localtris +- `test_s3_real.py` - Test real S3 upload +- `test_story_6_3.py` - Test story 6.3 + +**Parameters:** Various (see individual scripts) + +--- + +## Suggested GUI Structure + +### Main Navigation + +#### 1. Dashboard +- System health status +- Recent activity feed +- Quick stats: + - Total projects + - Total sites + - Total content generated + - Recent deployments +- Quick actions: + - Create new project + - Provision new site + - Generate content + +--- + +#### 2. Projects +**List View:** +- Table of all projects +- Columns: ID, Name, Keyword, Created, Status +- Filters: By user, by date, by status +- Search: By name, keyword +- Actions: View, Edit, Delete, Create Job, Generate + +**Create Project:** +- Tab 1: CORA Ingestion + - File upload (drag & drop or file picker) + - Project name input + - Money site URL input + - Custom anchors (comma-separated) + - Authentication fields +- Tab 2: Simple Ingestion + - File upload + - Project name (optional, can override spreadsheet) + - Money site URL + - Authentication fields + +**Project Details:** +- Project information display +- Related content list +- Job files list +- Actions: + - Create job file + - Generate content + - Export links + - Edit project + +**Create Job File:** +- Project selector (dropdown) +- Deployment targets (multi-select with search) +- Tier 1 count (number input) +- Tier 2 count (number input) +- Model selection (dropdown) +- Output path (optional, file picker) +- Authentication fields + +--- + +#### 3. Sites +**List View:** +- Table of all sites +- Columns: ID, Site Name, Domain, Storage Zone, Region, Provider +- Filters: By provider, by region, by status +- Search: By name, domain +- Actions: View, Edit, Delete, Sync + +**Provision Site:** +- Site name input +- Domain input (FQDN) +- Storage name input (with uniqueness check) +- Region selector (dropdown: DE, NY, LA, SG, SYD) +- Authentication fields +- DNS configuration instructions display + +**Attach Domain:** +- Site name input +- Domain input (FQDN) +- Storage name selector (dropdown of existing) +- Authentication fields + +**Discover S3 Buckets:** +- Auto-import toggle +- Bucket list display +- Status indicators (Available, Registered, Excluded) +- Bulk selection +- Import button + +**Site Details:** +- Site information display +- Storage zone details +- Pull zone details +- DNS configuration +- Deployed content count +- Actions: Edit, Remove, View Content + +--- + +#### 4. Content +**List View:** +- Table of all generated content +- Columns: ID, Title, Project, Tier, Site, Status, Created +- Filters: By project, by tier, by site, by status +- Search: By title +- Actions: View, Deploy, Delete + +**Deploy Batch:** +- Batch/Project selector (dropdown) +- Continue on error (checkbox) +- Dry run (checkbox) +- Authentication fields +- Progress bar +- Results display + +**Verify Deployment:** +- Batch/Project selector (dropdown) +- Sample size (number input, optional) +- Timeout (number input, default: 10) +- Results display: + - Total checked + - Successful + - Failed (with details) + +**Content Details:** +- Content display (HTML preview) +- Metadata display +- Deployment status +- Actions: Edit, Redeploy, Delete + +--- + +#### 5. Maintenance +**Update Index Pages:** +- Provider filter (dropdown: All, S3, Bunny) +- Force update (checkbox) +- Limit (number input, optional) +- Hostname filter (text input, optional) +- Dry run (checkbox) +- Progress bar +- Results display + +**Add Robots.txt:** +- Provider filter (dropdown: All, S3, Bunny) +- Dry run (checkbox) +- Progress bar +- Results display + +**Verify Deployment:** +- Same as Content → Verify Deployment + +**Database Tools:** +- Initialize database (with confirmation) +- Reset database (with strong confirmation) +- Run migrations (list with status) +- Check migration status + +--- + +#### 6. Settings +**User Management:** +- List users (table) +- Add user form +- Edit user form +- Delete user (with confirmation) +- Role management + +**System Configuration:** +- Display current config +- API keys management (view/edit) +- Model selection +- Environment variables display + +**Authentication:** +- Login/logout +- Session management +- Remember credentials option + +--- + +## Key Forms/Components + +### Project Creation Form +**Fields:** +- File upload (CORA or Simple) + - Drag & drop zone + - File picker button + - File validation (must be .xlsx) +- Project name (text input, required) +- Money site URL (text input, URL validation) +- Custom anchors (text input, comma-separated, CORA only) +- Authentication (username/password, optional if session exists) + +**Validation:** +- File format check +- URL format validation +- Required field validation + +--- + +### Job Creation Form +**Fields:** +- Project selector (dropdown, required) +- Deployment targets (multi-select with search, optional) +- Tier 1 count (number input, default: 10) +- Tier 2 count (number input, default: 30) +- Model selection (dropdown, optional) +- Output path (file picker, optional) +- Authentication (username/password, optional if session exists) + +**Features:** +- Real-time validation +- Preview of job file structure +- Save as template option + +--- + +### Generate Batch Form +**Fields:** +- Job file selector (file picker, required) +- Model override (dropdown, optional) +- Debug mode (checkbox) +- Continue on error (checkbox) +- Authentication (username/password, optional if session exists) + +**Features:** +- Progress bar with percentage +- Live log streaming +- Cancel button +- Results summary + +--- + +### Deploy Batch Form +**Fields:** +- Batch/Project selector (dropdown, required) +- Continue on error (checkbox, default: checked) +- Dry run (checkbox) +- Authentication (username/password, optional if session exists) + +**Features:** +- Progress bar +- File-by-file status +- Results summary +- Error details + +--- + +### Site Provision Form +**Fields:** +- Site name (text input, required) +- Domain (text input, FQDN validation, required) +- Storage name (text input, required, uniqueness check) +- Region selector (dropdown, required) +- Authentication (username/password, optional if session exists) + +**Features:** +- Real-time domain validation +- Storage name uniqueness check +- DNS configuration instructions display +- Copy DNS settings button + +--- + +### Maintenance Tools + +#### Update Index Pages +**Fields:** +- Provider filter (dropdown: All, S3, Bunny) +- Force update (checkbox) +- Limit (number input, optional) +- Hostname filter (text input, optional) +- Dry run (checkbox) + +**Features:** +- Progress bar +- Site-by-site status +- Results summary + +#### Add Robots.txt +**Fields:** +- Provider filter (dropdown: All, S3, Bunny) +- Dry run (checkbox) + +**Features:** +- Progress bar +- Results summary + +--- + +## Implementation Notes + +### 1. Authentication +Many commands require `--username`/`--password`. Consider: +- **Session-based authentication** in GUI + - Login page with remember option + - JWT or session tokens + - Auto-refresh tokens +- **Remember credentials** option + - Secure storage (encrypted local storage) + - Optional "remember me" checkbox +- **Admin vs User role handling** + - Role-based UI visibility + - Permission checks before actions + - Clear error messages for unauthorized actions + +--- + +### 2. File Operations +- **File upload** for CORA/Simple ingestion + - Drag & drop interface + - File picker fallback + - Progress indicator + - File size validation + - Format validation (.xlsx only) +- **File picker** for job files + - Recent files list + - File browser + - File validation (JSON format) +- **File download** for link exports + - CSV export + - JSON export options + - Download button with progress + +--- + +### 3. Real-time Updates +- **Progress bars** for generation/deployment + - Percentage complete + - Current item being processed + - Estimated time remaining + - Cancel button +- **Live status updates** + - WebSocket or polling for status + - Real-time log streaming + - Status badges (Pending, Processing, Complete, Failed) +- **Log streaming** + - Scrollable log viewer + - Auto-scroll to bottom + - Filter by log level + - Export logs option + +--- + +### 4. Validation +- **File format validation** before upload + - Check file extension + - Validate file structure (for spreadsheets) + - Show clear error messages +- **URL/domain validation** + - Real-time validation as user types + - Format checking (http://, https://) + - Domain format validation +- **Job file JSON validation** + - Schema validation + - Syntax checking + - Required field checking + - Clear error messages with line numbers + +--- + +### 5. Error Handling +- **Display error messages clearly** + - User-friendly error messages + - Technical details in expandable section + - Error codes for support +- **Retry mechanisms** + - Retry button for failed operations + - Automatic retry with exponential backoff + - Max retry limit +- **Continue-on-error options** + - Checkbox for batch operations + - Summary of successes and failures + - Detailed error log + +--- + +### 6. Data Display +- **Tables** for projects, sites, content + - Sortable columns + - Filterable rows + - Pagination for large datasets + - Row selection (multi-select) + - Bulk actions +- **Filters and search** + - Text search with highlighting + - Advanced filters (date range, status, etc.) + - Saved filter presets +- **Pagination** + - Page size selector + - Page navigation + - Total count display +- **Export to CSV/JSON** + - Export current view (with filters applied) + - Export all data + - Custom field selection + +--- + +### 7. User Experience Enhancements +- **Confirmation dialogs** for destructive actions + - Delete operations + - Reset database + - Remove sites +- **Loading states** + - Skeleton loaders + - Spinners for buttons + - Disable forms during submission +- **Success notifications** + - Toast notifications + - Success banners + - Confirmation messages +- **Keyboard shortcuts** + - Quick navigation + - Common actions (Ctrl+S to save, etc.) +- **Responsive design** + - Mobile-friendly layouts + - Tablet optimization + - Desktop-first with mobile fallbacks + +--- + +### 8. Performance Considerations +- **Lazy loading** for large lists + - Virtual scrolling + - Infinite scroll option + - Load on demand +- **Caching** + - Cache project/site lists + - Cache user session + - Cache configuration +- **Optimistic updates** + - Update UI immediately + - Rollback on error +- **Debouncing** for search/filter inputs + - Reduce API calls + - Improve performance + +--- + +### 9. Accessibility +- **Keyboard navigation** + - Tab order + - Focus management + - Keyboard shortcuts +- **Screen reader support** + - ARIA labels + - Semantic HTML + - Alt text for images +- **Color contrast** + - WCAG AA compliance + - High contrast mode +- **Error announcements** + - Screen reader announcements + - Error message association + +--- + +## Technology Recommendations + +### Frontend Framework Options +1. **React + TypeScript** + - Component library: Material-UI or Ant Design + - State management: Redux or Zustand + - Forms: React Hook Form + - HTTP client: Axios + +2. **Vue.js + TypeScript** + - Component library: Vuetify or Element Plus + - State management: Pinia + - Forms: Vee-Validate + - HTTP client: Axios + +3. **Next.js (React)** + - Full-stack option + - Server-side rendering + - API routes + +### Backend Integration +- **REST API** (if creating new API endpoints) + - FastAPI (Python) to match existing codebase + - OpenAPI/Swagger documentation +- **Direct CLI execution** (simpler initial approach) + - Execute CLI commands from backend + - Parse output + - Return structured data + +### Additional Tools +- **File upload**: react-dropzone or similar +- **Tables**: react-table or ag-grid +- **Charts**: Chart.js or Recharts +- **Notifications**: react-toastify or similar +- **Logs**: react-terminal or xterm.js + +--- + +## Next Steps + +1. **Choose technology stack** +2. **Set up project structure** +3. **Create API layer** (if needed) or CLI wrapper +4. **Build authentication system** +5. **Implement core pages** (Dashboard, Projects, Sites) +6. **Add forms and validation** +7. **Implement real-time updates** +8. **Add error handling and notifications** +9. **Testing and refinement** +10. **Deployment** + +--- + +## Related Documentation + +- [CLI Command Reference](./CLI_COMMAND_REFERENCE.md) - Detailed CLI documentation +- [Job Schema](./job-schema.md) - Job file structure and configuration +- [Architecture](./architecture.md) - System architecture overview +- [Scripts README](../scripts/README.md) - Scripts documentation diff --git a/docs/job-schema.md b/docs/job-schema.md index 58d0f61..664d347 100644 --- a/docs/job-schema.md +++ b/docs/job-schema.md @@ -142,11 +142,13 @@ Each tier in the `tiers` object defines content generation parameters for that s - **Type**: `boolean` (optional, default: `false`) - **Purpose**: Auto-create sites when available pool is insufficient - **Behavior**: Creates generic sites using project keyword as prefix +- **Status**: ⚠️ **NOT IMPLEMENTED** - Parsed but does not function ### `create_sites_for_keywords` - **Type**: `Array<Object>` (optional) - **Purpose**: Pre-create sites for specific keywords before assignment - **Structure**: Each object must have `keyword` (string) and `count` (integer) +- **Status**: ⚠️ **NOT IMPLEMENTED** - Parsed but does not function #### Keyword Site Creation Object | Field | Type | Required | Description | @@ -190,14 +192,7 @@ Each tier in the `tiers` object defines content generation parameters for that s | `outline` | `string` | Model to use for outline generation | | `content` | `string` | Model to use for content generation | -### Available Models (from master.config.json) -- `anthropic/claude-sonnet-4.5` (Claude Sonnet 4.5) -- `anthropic/claude-3.5-sonnet` (Claude 3.5 Sonnet) -- `openai/gpt-4o` (GPT-4 Optimized) -- `openai/gpt-4o-mini` (GPT-4 Mini) -- `meta-llama/llama-3.1-70b-instruct` (Llama 3.1 70B) -- `meta-llama/llama-3.1-8b-instruct` (Llama 3.1 8B) -- `google/gemini-2.5-flash` (Gemini 2.5 Flash) + ### Example ```json @@ -249,6 +244,9 @@ Each tier in the `tiers` object defines content generation parameters for that s - **Type**: `Object` (optional) - **Purpose**: Configures how many tiered links to generate per article - **Default**: `{"min": 2, "max": 4}` if not specified +- **Behavior**: + - Tier1: Always 1 link to money site (this setting ignored) + - Tier2+: Random between min and max links to lower tier #### Tiered Link Range Object | Field | Type | Required | Description | @@ -266,6 +264,141 @@ Each tier in the `tiers` object defines content generation parameters for that s } ``` +## Interlinking Configuration (Story 3.3) + +### `interlinking` +- **Type**: `Object` (optional) +- **Purpose**: Configures internal linking behavior within articles +- **Can be set at**: Job level (all tiers) or tier level (specific tier) +- **Tier-level override**: Tier-level config overrides job-level for that tier + +#### Interlinking Object Fields +| Field | Type | Description | +|-------|------|-------------| +| `links_per_article_min` | `integer` | Minimum number of tiered links (same as `tiered_link_count_range.min`) | +| `links_per_article_max` | `integer` | Maximum number of tiered links (same as `tiered_link_count_range.max`) | +| `see_also_min` | `integer` | Minimum number of "See Also" links to same-tier articles (default: 4) | +| `see_also_max` | `integer` | Maximum number of "See Also" links to same-tier articles (default: 5) | + +### Example +```json +{ + "interlinking": { + "links_per_article_min": 2, + "links_per_article_max": 4, + "see_also_min": 4, + "see_also_max": 5 + } +} +``` + +**Behavior:** +- `links_per_article_min/max`: Controls how many links to lower tier articles +- `see_also_min/max`: Controls how many "See Also" links to randomly selected articles from the same tier + +## Anchor Text Configuration (Story 8.1) + +### `anchor_text_config` +- **Type**: `Object` (optional) +- **Purpose**: Controls anchor text selection for tiered links +- **Can be set at**: Job level (all tiers) or tier level (specific tier) +- **Tier-level override**: Tier-level config overrides job-level for that tier + +#### Anchor Text Config Modes +Explicit is great for doing branded anchor text - we can add companyname to the mix as many times as we want to get the percentage we want. +| Mode | Description | +|------|-------------| +| `default` | Use master.config.json tier rules (main_keyword for tier1, related_searches for tier2+) | +| `override` | Replace tier rules with `custom_text` array | +| `append` | Add `custom_text` array to tier rules | +| `explicit` | Use only explicitly provided terms (no algorithm-generated terms) | + +#### Anchor Text Config Object (Job Level) +| Field | Type | Description | +|-------|------|-------------| +| `mode` | `string` | One of: "default", "override", "append", "explicit" | +| `custom_text` | `Array<string>` | Custom anchor text terms (for override/append modes) | +| `tier1` | `Array<string>` | Explicit terms for tier1 (for explicit mode) | +| `tier2` | `Array<string>` | Explicit terms for tier2 (for explicit mode) | +| `tier3` | `Array<string>` | Explicit terms for tier3 (for explicit mode) | +| `tier4_plus` | `Array<string>` | Explicit terms for tier4+ (for explicit mode) | + +#### Anchor Text Config Object (Tier Level) +| Field | Type | Description | +|-------|------|-------------| +| `mode` | `string` | One of: "default", "override", "append", "explicit" | +| `custom_text` | `Array<string>` | Custom anchor text terms (for override/append modes) | +| `terms` | `Array<string>` | Explicit terms for this tier (for explicit mode) | + +### Examples + +**Default mode (use tier rules):** +```json +{ + "anchor_text_config": { + "mode": "default" + } +} +``` + +**Override mode (replace with custom text):** +```json +{ + "anchor_text_config": { + "mode": "override", + "custom_text": ["custom term 1", "custom term 2"] + } +} +``` + +**Explicit mode (job level):** +```json +{ + "anchor_text_config": { + "mode": "explicit", + "tier1": ["high volume", "precision machining", "custom manufacturing"], + "tier2": ["high volume production", "bulk manufacturing", "large scale"] + } +} +``` + +**Explicit mode (tier level override):** +```json +{ + "tiers": { + "tier1": { + "count": 12, + "anchor_text_config": { + "mode": "explicit", + "terms": ["high volume", "precision"] + } + } + } +} +``` + +**Explicit mode with branded anchor text ratio (generated via ingest-cora):** +When using `ingest-cora` with `--tier1-branded-ratio`, the system automatically generates an explicit anchor text list with the specified ratio of branded terms. For example, with a 75% ratio and branded text "Acme Corp", the generated config might look like: +```json +{ + "tiers": { + "tier1": { + "count": 10, + "anchor_text_config": { + "mode": "explicit", + "terms": ["Acme Corp", "Acme Corp", "Acme Corp", "Acme Corp", "Acme Corp", "Acme Corp", "Acme Corp", "Acme Corp", "Acme Corp", "Acme Corp", "Acme Corp", "Acme Corp", "Acme Corp", "Acme Corp", "Acme Corp", "main keyword", "learn about main keyword", "main keyword guide", "best main keyword", "main keyword tips"] + } + } + } +} +``` +This achieves 75% branded (15/20) and 25% keyword-based (5/20) anchor text selection. + +**Behavior:** +- System tries to find provided terms in content first, then inserts if not found +- When using "explicit" mode, only the provided terms are used (no algorithm-generated terms) +- Tier-level explicit config takes precedence over job-level for that tier + ## Complete Example ```json diff --git a/src/cli/commands.py b/src/cli/commands.py index 286c532..ae9ee71 100644 --- a/src/cli/commands.py +++ b/src/cli/commands.py @@ -9,6 +9,7 @@ from src.auth.service import AuthService from src.database.session import db_manager from src.database.repositories import UserRepository, SiteDeploymentRepository, ProjectRepository from src.database.models import User +from src.interlinking.anchor_text_generator import AnchorTextGenerator from src.deployment.bunnynet import ( BunnyNetClient, BunnyNetAPIError, @@ -36,7 +37,13 @@ from datetime import datetime load_dotenv() -def create_job_file_for_project(project_id: int, project_name: str, session) -> Optional[str]: +def create_job_file_for_project( + project_id: int, + project_name: str, + session, + tier1_branded_ratio: Optional[float] = None, + tier1_branded_text: Optional[str] = None +) -> Optional[str]: """ Create a job JSON file for a newly created project. @@ -44,6 +51,8 @@ def create_job_file_for_project(project_id: int, project_name: str, session) -> project_id: The ID of the created project project_name: The name of the project (for filename) session: Database session + tier1_branded_ratio: Optional ratio of branded anchor text for tier1 (0.0-1.0) + tier1_branded_text: Optional branded anchor text (company name) for tier1 Returns: Path to created file, or None if creation failed @@ -81,22 +90,50 @@ def create_job_file_for_project(project_id: int, project_name: str, session) -> base_filename = f"{sanitized_name}-{date_suffix}.json" filepath = jobs_dir / base_filename + # Build tier1 configuration + tier1_config = { + "count": t1_count, + "min_word_count": 1250, + "max_word_count": 2000, + "models": { + "title": "openai/gpt-4o-mini", + "outline": "openai/gpt-4o-mini", + "content": "x-ai/grok-4-fast" + } + } + + # Add anchor_text_config if branded ratio and text are provided + if tier1_branded_ratio is not None and tier1_branded_text: + # Get project to retrieve main_keyword for non-branded terms + project_repo = ProjectRepository(session) + project = project_repo.get_by_id(project_id) + + if project and project.main_keyword: + # Generate keyword variations for non-branded terms + anchor_generator = AnchorTextGenerator() + keyword_variations = anchor_generator._generate_from_keyword(project, 10) + + # Calculate term distribution (use 20 terms for good distribution) + total_terms = 20 + branded_count = int(total_terms * tier1_branded_ratio) + keyword_count = total_terms - branded_count + + # Create anchor text list with branded terms and keyword variations + anchor_terms = [tier1_branded_text] * branded_count + anchor_terms.extend(keyword_variations[:keyword_count]) + + tier1_config["anchor_text_config"] = { + "mode": "explicit", + "terms": anchor_terms + } + job_template = { "jobs": [ { "project_id": project_id, "deployment_targets": selected_domains, "tiers": { - "tier1": { - "count": t1_count, - "min_word_count": 1250, - "max_word_count": 2000, - "models": { - "title": "openai/gpt-4o-mini", - "outline": "openai/gpt-4o-mini", - "content": "x-ai/grok-4-fast" - } - }, + "tier1": tier1_config, "tier2": { "count": t2_count, "min_word_count": 1000, @@ -943,9 +980,10 @@ def sync_sites(admin_user: Optional[str], admin_password: Optional[str], dry_run @click.option('--name', '-n', required=True, help='Project name') @click.option('--money-site-url', '-m', help='Money site URL (e.g., https://example.com)') @click.option('--custom-anchors', '-a', help='Comma-separated list of custom anchor text (optional)') +@click.option('--tier1-branded-ratio', default=0.75, type=float, help='Ratio of branded anchor text for tier1 (default: 0.75)') @click.option('--username', '-u', help='Username for authentication') @click.option('--password', '-p', help='Password for authentication') -def ingest_cora(file_path: str, name: str, money_site_url: Optional[str], custom_anchors: Optional[str], username: Optional[str], password: Optional[str]): +def ingest_cora(file_path: str, name: str, money_site_url: Optional[str], custom_anchors: Optional[str], tier1_branded_ratio: float, username: Optional[str], password: Optional[str]): """Ingest a CORA .xlsx report and create a new project""" try: if not username or not password: @@ -1014,7 +1052,25 @@ def ingest_cora(file_path: str, name: str, money_site_url: Optional[str], custom if project.custom_anchor_text: click.echo(f"Custom Anchor Text: {', '.join(project.custom_anchor_text)}") - job_file = create_job_file_for_project(project.id, project.name, session) + # Handle tier1 branded anchor text if ratio is specified + tier1_branded_text = None + if tier1_branded_ratio is not None and tier1_branded_ratio > 0: + tier1_branded_text = click.prompt( + "\nEnter branded anchor text (company name) for tier1", + type=str + ).strip() + if not tier1_branded_text: + click.echo("Warning: Empty branded anchor text provided, skipping tier1 branded anchor text configuration.", err=True) + tier1_branded_text = None + tier1_branded_ratio = None + + job_file = create_job_file_for_project( + project.id, + project.name, + session, + tier1_branded_ratio=tier1_branded_ratio, + tier1_branded_text=tier1_branded_text + ) if job_file: click.echo(f"Job file created: {job_file}") @@ -1193,6 +1249,133 @@ def list_projects(username: Optional[str], password: Optional[str]): raise click.Abort() +@app.command("create-job") +@click.option('--project-id', '-p', required=True, type=int, help='Project ID to create job file for') +@click.option('--deployment-targets', '-d', multiple=True, help='Deployment target hostnames (can specify multiple times)') +@click.option('--tier1-count', default=10, type=int, help='Number of tier1 articles (default: 10)') +@click.option('--tier2-count', default=30, type=int, help='Number of tier2 articles (default: 30)') +@click.option('--output', '-o', type=click.Path(), help='Output file path (default: jobs/{project_name}.json)') +@click.option('--username', '-u', help='Username for authentication') +@click.option('--password', '-pwd', help='Password for authentication') +def create_job( + project_id: int, + deployment_targets: tuple, + tier1_count: int, + tier2_count: int, + output: Optional[str], + username: Optional[str], + password: Optional[str] +): + """Create a job file from an existing project ID""" + try: + if not username or not password: + username, password = prompt_admin_credentials() + + session = db_manager.get_session() + try: + user_repo = UserRepository(session) + auth_service = AuthService(user_repo) + + user = auth_service.authenticate_user(username, password) + if not user: + click.echo("Error: Authentication failed", err=True) + raise click.Abort() + + project_repo = ProjectRepository(session) + project = project_repo.get_by_id(project_id) + + if not project: + click.echo(f"Error: Project {project_id} not found", err=True) + raise click.Abort() + + deployment_targets_list = list(deployment_targets) if deployment_targets else None + + if not deployment_targets_list: + site_repo = SiteDeploymentRepository(session) + sites = site_repo.get_all() + available_domains = [ + site.custom_hostname + for site in sites + if site.custom_hostname is not None + ] + if available_domains: + click.echo(f"Available sites: {', '.join(available_domains[:5])}{'...' if len(available_domains) > 5 else ''}") + click.echo("Note: No deployment_targets specified. You can add them manually to the job file.") + + sanitized_name = "".join(c if c.isalnum() or c in ('-', '_') else '-' for c in project.name.lower()).strip('-') + sanitized_name = '-'.join(sanitized_name.split()) + + jobs_dir = Path("jobs") + jobs_dir.mkdir(exist_ok=True) + + if output: + filepath = Path(output) + else: + base_filename = f"{sanitized_name}.json" + filepath = jobs_dir / base_filename + + if filepath.exists(): + date_suffix = datetime.now().strftime("%y%m%d") + base_filename = f"{sanitized_name}-{date_suffix}.json" + filepath = jobs_dir / base_filename + + job_template = { + "jobs": [ + { + "project_id": project_id, + "tiers": { + "tier1": { + "count": tier1_count, + "min_word_count": 1250, + "max_word_count": 2000, + "models": { + "title": "openai/gpt-4o-mini", + "outline": "openai/gpt-4o-mini", + "content": "anthropic/claude-3.5-sonnet" + } + }, + "tier2": { + "count": tier2_count, + "min_word_count": 1000, + "max_word_count": 1250, + "models": { + "title": "openai/gpt-4o-mini", + "outline": "openai/gpt-4o-mini", + "content": "openai/gpt-4o-mini" + }, + "interlinking": { + "links_per_article_min": 3, + "links_per_article_max": 6 + } + } + } + } + ] + } + + if deployment_targets_list: + job_template["jobs"][0]["deployment_targets"] = deployment_targets_list + + with open(filepath, 'w', encoding='utf-8') as f: + json.dump(job_template, f, indent=2) + + click.echo(f"\nJob file created: {filepath}") + click.echo(f"Project: {project.name} (ID: {project_id})") + click.echo(f"Tier1: {tier1_count} articles") + click.echo(f"Tier2: {tier2_count} articles") + if deployment_targets_list: + click.echo(f"Deployment targets: {', '.join(deployment_targets_list)}") + click.echo(f"\nTo run this job:") + click.echo(f" uv run python main.py generate-batch --job-file {filepath} -u {username} --password <password>") + + finally: + session.close() + + except Exception as e: + click.echo(f"Error creating job file: {e}", err=True) + raise click.Abort() + + @app.command("generate-batch") @click.option('--job-file', '-j', required=True, type=click.Path(exists=True), help='Path to job JSON file')