From de21a22b72ef2ebe83263d4eef25fed346ef0ea4 Mon Sep 17 00:00:00 2001 From: PeninsulaInd Date: Fri, 19 Dec 2025 12:43:01 -0600 Subject: [PATCH] Implement Story 8.1: Job-Level Anchor Text Control for T1 and T2+ - Add explicit anchor text mode support in AnchorTextConfig - Support tier-specific anchor text terms at job-level (tier1, tier2, tier3, tier4_plus) - Support tier-level explicit anchor text with 'terms' array - Update content injection to prioritize explicit terms when mode is 'explicit' - Add validation for explicit mode requiring term lists - Update JOB_FIELD_REFERENCE.md with explicit mode documentation and examples - Add comprehensive unit and integration tests for explicit anchor text Includes multi-cloud storage migration script and related database changes. --- JOB_FIELD_REFERENCE.md | 60 ++++++- docs/prd/epic-8-functional-debt.md | 51 ++++++ .../story-8.1-job-anchor-text-control.md | 101 +++++++++++ .../migrate_add_multi_cloud_storage_fields.py | 142 +++++++++++++++ src/database/interfaces.py | 7 +- src/database/models.py | 9 +- src/database/repositories.py | 19 +- src/generation/job_config.py | 82 +++++++-- src/interlinking/content_injection.py | 21 +++ .../test_content_injection_integration.py | 84 +++++++++ tests/unit/test_content_injection.py | 87 ++++++++++ tests/unit/test_job_config.py | 162 +++++++++++++++++- 12 files changed, 806 insertions(+), 19 deletions(-) create mode 100644 docs/prd/epic-8-functional-debt.md create mode 100644 docs/stories/story-8.1-job-anchor-text-control.md create mode 100644 scripts/migrate_add_multi_cloud_storage_fields.py diff --git a/JOB_FIELD_REFERENCE.md b/JOB_FIELD_REFERENCE.md index 5098353..8ed9db3 100644 --- a/JOB_FIELD_REFERENCE.md +++ b/JOB_FIELD_REFERENCE.md @@ -11,7 +11,8 @@ auto_create_sites - Boolean (NOT IMPLEMENTED - parsed but doesn't wor create_sites_for_keywords - Array of {keyword, count} objects (NOT IMPLEMENTED - parsed but doesn't work) models - {title, outline, content} with model strings tiered_link_count_range - {min, max} integers -anchor_text_config - {mode, custom_text} +anchor_text_config - {mode, custom_text, tier1, tier2, tier3, tier4_plus} + - For "explicit" mode, use tier-specific arrays (tier1, tier2, etc.) instead of custom_text failure_config - {max_consecutive_failures, skip_on_failure} interlinking - {links_per_article_min, links_per_article_max, see_also_min, see_also_max} tiers - Required, object with tier1/tier2/tier3 @@ -28,7 +29,8 @@ min_h3_tags - Integer max_h3_tags - Integer models - {title, outline, content} - overrides job-level interlinking - {links_per_article_min, links_per_article_max, see_also_min, see_also_max} - overrides job-level -anchor_text_config - {mode, custom_text} - overrides job-level for this tier only +anchor_text_config - {mode, custom_text, terms} - overrides job-level for this tier only + - For "explicit" mode, use "terms" array instead of "custom_text" ``` ## Field Behaviors @@ -43,6 +45,9 @@ anchor_text_config - {mode, custom_text} - overrides job-level for this tier - "default" = Use master.config.json tier rules - "override" = Replace with custom_text - "append" = Add custom_text to tier rules +- "explicit" = Use only explicitly provided terms (no algorithm-generated terms) + - Job-level: Provide tier1, tier2, tier3, tier4_plus arrays with terms + - Tier-level: Provide terms array for that specific tier - Tier-level config overrides job-level config for that tier **tiered_link_count_range**: How many links to lower tier @@ -161,3 +166,54 @@ If not specified, these defaults apply: - Tier2: Uses related_searches from project - Can override with anchor_text_config +## Explicit Anchor Text Example + +Use "explicit" mode to specify exact anchor text terms for each tier: + +```json +{ + "jobs": [{ + "project_id": 26, + "anchor_text_config": { + "mode": "explicit", + "tier1": ["high volume", "precision machining", "custom manufacturing"], + "tier2": ["high volume production", "bulk manufacturing", "large scale"] + }, + "tiers": { + "tier1": {"count": 12}, + "tier2": {"count": 38} + } + }] +} +``` + +Or use tier-level explicit config to override job-level for a specific tier: + +```json +{ + "jobs": [{ + "project_id": 26, + "anchor_text_config": { + "mode": "explicit", + "tier1": ["high volume", "precision machining"], + "tier2": ["bulk manufacturing"] + }, + "tiers": { + "tier1": { + "count": 12, + "anchor_text_config": { + "mode": "explicit", + "terms": ["high volume", "precision"] + } + }, + "tier2": {"count": 38} + } + }] +} +``` + +When using "explicit" mode, the system will: +- Use only the provided terms (no algorithm-generated terms) +- Try to find these terms in content first, then insert if not found +- Tier-level explicit config takes precedence over job-level for that tier + diff --git a/docs/prd/epic-8-functional-debt.md b/docs/prd/epic-8-functional-debt.md new file mode 100644 index 0000000..4cee6b5 --- /dev/null +++ b/docs/prd/epic-8-functional-debt.md @@ -0,0 +1,51 @@ +# Epic 8: Functional Debt + +## Epic Goal +To address functional limitations and gaps in the system that prevent users from achieving specific business requirements, particularly around customization and control of content generation and linking behavior. + +## Rationale +While the system provides automated content generation with sensible defaults, there are cases where users need explicit control over specific aspects (like anchor text terms) that don't fit the standard algorithmic approach. This epic addresses these functional gaps to provide the flexibility needed for real-world use cases. + +## Status +- **Story 8.1**: 🔄 PLANNING (Job-Level Anchor Text Control) + +## Stories + +### Story 8.1: Job-Level Anchor Text Control for T1 and T2+ +**Estimated Effort**: 2 story points + +**As a user**, I want to explicitly specify anchor text terms in my job configuration for both Tier 1 and Tier 2+ links, so that I can include specific terms (like "high volume") that aren't covered by the standard algorithm. + +**Acceptance Criteria**: +* Job JSON supports explicit anchor text configuration for both Tier 1 and Tier 2+ +* Anchor text can be specified at job-level (applies to all tiers) or tier-level (tier-specific) +* When explicit anchor text is provided, it should be used in addition to or instead of algorithm-generated anchor text +* Support for multiple anchor text terms per tier +* Anchor text terms are used when injecting links (tiered links, homepage links, etc.) +* If explicit anchor text is provided, it takes precedence over algorithm-generated terms +* Backward compatible: existing jobs without explicit anchor text continue to work with current algorithm + +**Technical Notes**: +* Extend `anchor_text_config` in job JSON to support explicit term lists +* Update `_get_anchor_texts_for_tier()` in `content_injection.py` to prioritize explicit terms +* Consider adding a new mode like "explicit" that uses only the provided terms, or extend "override" mode +* Document in `JOB_FIELD_REFERENCE.md` how to use explicit anchor text + +**Example Job Configuration**: +```json +{ + "jobs": [{ + "project_id": 26, + "anchor_text_config": { + "mode": "explicit", + "tier1": ["high volume", "precision machining", "custom manufacturing"], + "tier2": ["high volume production", "bulk manufacturing", "large scale"] + }, + "tiers": { + "tier1": {"count": 12}, + "tier2": {"count": 38} + } + }] +} +``` + diff --git a/docs/stories/story-8.1-job-anchor-text-control.md b/docs/stories/story-8.1-job-anchor-text-control.md new file mode 100644 index 0000000..995bb02 --- /dev/null +++ b/docs/stories/story-8.1-job-anchor-text-control.md @@ -0,0 +1,101 @@ +# Story 8.1: Job-Level Anchor Text Control for T1 and T2+ + +## Story Details +**As a user**, I want to explicitly specify anchor text terms in my job configuration for both Tier 1 and Tier 2+ links, so that I can include specific terms (like "high volume") that aren't covered by the standard algorithm. + +## Acceptance Criteria + +### 1. Job JSON Configuration Support +**Status:** TODO + +- Job JSON supports explicit anchor text configuration for both Tier 1 and Tier 2+ +- Anchor text can be specified at job-level (applies to all tiers) or tier-level (tier-specific) +- Support for multiple anchor text terms per tier +- Configuration format is intuitive and well-documented + +**Example Format:** +```json +{ + "jobs": [{ + "project_id": 26, + "anchor_text_config": { + "mode": "explicit", + "tier1": ["high volume", "precision machining", "custom manufacturing"], + "tier2": ["high volume production", "bulk manufacturing", "large scale"] + }, + "tiers": { + "tier1": { + "count": 12, + "anchor_text_config": { + "mode": "explicit", + "terms": ["high volume", "precision"] + } + }, + "tier2": {"count": 38} + } + }] +} +``` + +### 2. Anchor Text Priority and Usage +**Status:** TODO + +- When explicit anchor text is provided, it should be used instead of algorithm-generated anchor text +- Explicit anchor text takes precedence over algorithm-generated terms +- Anchor text terms are used when injecting links (tiered links, homepage links, money site links, etc.) +- System tries to find explicit terms in content first, then falls back to insertion if not found + +### 3. Backward Compatibility +**Status:** TODO + +- Existing jobs without explicit anchor text continue to work with current algorithm +- Default behavior unchanged when no explicit anchor text is provided +- All existing anchor text modes ("default", "override", "append") continue to work + +### 4. Implementation Details +**Status:** TODO + +- Extend `AnchorTextConfig` dataclass in `src/generation/job_config.py` to support tier-specific term lists +- Update `_parse_job()` and `_parse_tier()` methods to parse explicit anchor text configuration +- Update `_get_anchor_texts_for_tier()` in `src/interlinking/content_injection.py` to prioritize explicit terms +- Add validation to ensure explicit terms are provided when mode is "explicit" +- Update `JOB_FIELD_REFERENCE.md` with documentation and examples + +### 5. Testing +**Status:** TODO + +- Unit tests for parsing explicit anchor text configuration +- Integration tests verifying explicit anchor text is used in link injection +- Tests for tier-level override of job-level anchor text +- Tests for backward compatibility with existing configurations + +## Technical Implementation + +### Changes Required + +1. **Job Config Parser** (`src/generation/job_config.py`): + - Extend `AnchorTextConfig` to support `tier1`, `tier2`, etc. fields + - Update parsing logic to handle tier-specific anchor text lists + - Add validation for explicit mode requiring term lists + +2. **Content Injection** (`src/interlinking/content_injection.py`): + - Update `_get_anchor_texts_for_tier()` to check for explicit terms first + - If explicit terms exist, use them; otherwise fall back to current algorithm + - Support both job-level and tier-level explicit anchor text + +3. **Documentation**: + - Update `JOB_FIELD_REFERENCE.md` with new anchor text configuration options + - Add examples showing explicit anchor text usage + +## Example Use Cases + +1. **Wide Variety**: User wants to include multiple different terms to a page, which isn't in related_searches or main_keyword variations +2. **Brand-Specific Terms**: User wants to use specific branded terms that aren't algorithmically generated +3. **Industry-Specific Jargon**: Terms that are important for SEO but don't appear in standard keyword extraction + +## Dependencies +- None (standalone feature) + +## Related Stories +- Story 3.3: Content Interlinking Injection (existing anchor text system) + diff --git a/scripts/migrate_add_multi_cloud_storage_fields.py b/scripts/migrate_add_multi_cloud_storage_fields.py new file mode 100644 index 0000000..015e346 --- /dev/null +++ b/scripts/migrate_add_multi_cloud_storage_fields.py @@ -0,0 +1,142 @@ +""" +Database migration script to add multi-cloud storage fields to site_deployments table +Story 6.3: Database Schema Updates for Multi-Cloud + +Adds: +- storage_provider (String(20), Not Null, Default: 'bunny', Indexed) +- s3_bucket_name (String(255), Nullable) +- s3_bucket_region (String(50), Nullable) +- s3_custom_domain (String(255), Nullable) +- s3_endpoint_url (String(500), Nullable) + +Usage: + python scripts/migrate_add_multi_cloud_storage_fields.py +""" + +import sys +from pathlib import Path + +project_root = Path(__file__).parent.parent +sys.path.insert(0, str(project_root)) + +from sqlalchemy import text +from src.database.session import db_manager +from src.core.config import get_config + + +def migrate(): + """Add multi-cloud storage fields to site_deployments table""" + print("Starting migration: add multi-cloud storage fields to site_deployments...") + + try: + config = get_config() + print(f"Database URL: {config.database.url}") + except Exception as e: + print(f"Error loading configuration: {e}") + sys.exit(1) + + try: + db_manager.initialize() + engine = db_manager.get_engine() + + with engine.connect() as conn: + print("Checking for existing columns...") + + result = conn.execute(text("PRAGMA table_info(site_deployments)")) + existing_columns = [row[1] for row in result] + print(f"Existing columns: {', '.join(existing_columns)}") + + migrations_applied = [] + + if "storage_provider" not in existing_columns: + print("Adding storage_provider column...") + conn.execute(text(""" + ALTER TABLE site_deployments + ADD COLUMN storage_provider VARCHAR(20) NOT NULL DEFAULT 'bunny' + """)) + conn.commit() + + print("Setting storage_provider='bunny' for all existing records...") + conn.execute(text(""" + UPDATE site_deployments + SET storage_provider = 'bunny' + """)) + conn.commit() + + print("Creating index on storage_provider...") + conn.execute(text(""" + CREATE INDEX IF NOT EXISTS idx_site_deployments_storage_provider + ON site_deployments(storage_provider) + """)) + conn.commit() + migrations_applied.append("storage_provider") + else: + print("storage_provider column already exists, skipping") + + if "s3_bucket_name" not in existing_columns: + print("Adding s3_bucket_name column...") + conn.execute(text(""" + ALTER TABLE site_deployments + ADD COLUMN s3_bucket_name VARCHAR(255) + """)) + conn.commit() + migrations_applied.append("s3_bucket_name") + else: + print("s3_bucket_name column already exists, skipping") + + if "s3_bucket_region" not in existing_columns: + print("Adding s3_bucket_region column...") + conn.execute(text(""" + ALTER TABLE site_deployments + ADD COLUMN s3_bucket_region VARCHAR(50) + """)) + conn.commit() + migrations_applied.append("s3_bucket_region") + else: + print("s3_bucket_region column already exists, skipping") + + if "s3_custom_domain" not in existing_columns: + print("Adding s3_custom_domain column...") + conn.execute(text(""" + ALTER TABLE site_deployments + ADD COLUMN s3_custom_domain VARCHAR(255) + """)) + conn.commit() + migrations_applied.append("s3_custom_domain") + else: + print("s3_custom_domain column already exists, skipping") + + if "s3_endpoint_url" not in existing_columns: + print("Adding s3_endpoint_url column...") + conn.execute(text(""" + ALTER TABLE site_deployments + ADD COLUMN s3_endpoint_url VARCHAR(500) + """)) + conn.commit() + migrations_applied.append("s3_endpoint_url") + else: + print("s3_endpoint_url column already exists, skipping") + + if migrations_applied: + print(f"\nMigration complete! Added columns: {', '.join(migrations_applied)}") + print("\nNew fields added:") + print(" - storage_provider (VARCHAR(20), NOT NULL, DEFAULT 'bunny', indexed)") + print(" - s3_bucket_name (VARCHAR(255), nullable)") + print(" - s3_bucket_region (VARCHAR(50), nullable)") + print(" - s3_custom_domain (VARCHAR(255), nullable)") + print(" - s3_endpoint_url (VARCHAR(500), nullable)") + else: + print("\nNo migrations needed - all columns already exist") + + except Exception as e: + print(f"Error during migration: {e}") + import traceback + traceback.print_exc() + sys.exit(1) + finally: + db_manager.close() + + +if __name__ == "__main__": + migrate() + diff --git a/src/database/interfaces.py b/src/database/interfaces.py index 800cf76..d3e2d4d 100644 --- a/src/database/interfaces.py +++ b/src/database/interfaces.py @@ -59,7 +59,12 @@ class ISiteDeploymentRepository(ABC): storage_zone_region: str, pull_zone_id: int, pull_zone_bcdn_hostname: str, - custom_hostname: Optional[str] = None + custom_hostname: Optional[str] = None, + storage_provider: Optional[str] = None, + s3_bucket_name: Optional[str] = None, + s3_bucket_region: Optional[str] = None, + s3_custom_domain: Optional[str] = None, + s3_endpoint_url: Optional[str] = None ) -> SiteDeployment: """Create a new site deployment""" pass diff --git a/src/database/models.py b/src/database/models.py index 69cefd2..94550d9 100644 --- a/src/database/models.py +++ b/src/database/models.py @@ -38,12 +38,13 @@ class User(Base): class SiteDeployment(Base): - """Site deployment model for bunny.net infrastructure tracking""" + """Site deployment model for multi-cloud storage infrastructure tracking""" __tablename__ = "site_deployments" id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) site_name: Mapped[str] = mapped_column(String(255), nullable=False) custom_hostname: Mapped[Optional[str]] = mapped_column(String(255), unique=True, nullable=True, index=True) + storage_provider: Mapped[str] = mapped_column(String(20), nullable=False, default="bunny", index=True) storage_zone_id: Mapped[int] = mapped_column(Integer, nullable=False) storage_zone_name: Mapped[str] = mapped_column(String(255), nullable=False) storage_zone_password: Mapped[str] = mapped_column(String(255), nullable=False) @@ -51,6 +52,10 @@ class SiteDeployment(Base): pull_zone_id: Mapped[int] = mapped_column(Integer, nullable=False) pull_zone_bcdn_hostname: Mapped[str] = mapped_column(String(255), unique=True, nullable=False) template_name: Mapped[str] = mapped_column(String(50), default="basic", nullable=False) + s3_bucket_name: Mapped[Optional[str]] = mapped_column(String(255), nullable=True) + s3_bucket_region: Mapped[Optional[str]] = mapped_column(String(50), nullable=True) + s3_custom_domain: Mapped[Optional[str]] = mapped_column(String(255), nullable=True) + s3_endpoint_url: Mapped[Optional[str]] = mapped_column(String(500), nullable=True) created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow, nullable=False) updated_at: Mapped[datetime] = mapped_column( DateTime, @@ -61,7 +66,7 @@ class SiteDeployment(Base): def __repr__(self) -> str: hostname = self.custom_hostname or self.pull_zone_bcdn_hostname - return f"" + return f"" class Project(Base): diff --git a/src/database/repositories.py b/src/database/repositories.py index f4fb9cc..9b87dbc 100644 --- a/src/database/repositories.py +++ b/src/database/repositories.py @@ -143,7 +143,12 @@ class SiteDeploymentRepository(ISiteDeploymentRepository): storage_zone_region: str, pull_zone_id: int, pull_zone_bcdn_hostname: str, - custom_hostname: Optional[str] = None + custom_hostname: Optional[str] = None, + storage_provider: Optional[str] = None, + s3_bucket_name: Optional[str] = None, + s3_bucket_region: Optional[str] = None, + s3_custom_domain: Optional[str] = None, + s3_endpoint_url: Optional[str] = None ) -> SiteDeployment: """ Create a new site deployment @@ -157,6 +162,11 @@ class SiteDeploymentRepository(ISiteDeploymentRepository): pull_zone_id: bunny.net Pull Zone ID pull_zone_bcdn_hostname: Default b-cdn.net hostname custom_hostname: Optional custom FQDN (e.g., www.yourdomain.com) + storage_provider: Storage provider type ('bunny', 's3', 's3_compatible'). Defaults to 'bunny' + s3_bucket_name: S3 bucket name (for S3 providers) + s3_bucket_region: S3 bucket region (for S3 providers) + s3_custom_domain: Custom domain for S3 (optional) + s3_endpoint_url: Custom endpoint URL for S3-compatible services (optional) Returns: The created SiteDeployment object @@ -167,12 +177,17 @@ class SiteDeploymentRepository(ISiteDeploymentRepository): deployment = SiteDeployment( site_name=site_name, custom_hostname=custom_hostname, + storage_provider=storage_provider or "bunny", storage_zone_id=storage_zone_id, storage_zone_name=storage_zone_name, storage_zone_password=storage_zone_password, storage_zone_region=storage_zone_region, pull_zone_id=pull_zone_id, - pull_zone_bcdn_hostname=pull_zone_bcdn_hostname + pull_zone_bcdn_hostname=pull_zone_bcdn_hostname, + s3_bucket_name=s3_bucket_name, + s3_bucket_region=s3_bucket_region, + s3_custom_domain=s3_custom_domain, + s3_endpoint_url=s3_endpoint_url ) try: diff --git a/src/generation/job_config.py b/src/generation/job_config.py index dbaa015..aa8e44e 100644 --- a/src/generation/job_config.py +++ b/src/generation/job_config.py @@ -46,8 +46,13 @@ class ModelConfig: @dataclass class AnchorTextConfig: """Anchor text configuration for interlinking""" - mode: str # "default", "override", "append" + mode: str # "default", "override", "append", "explicit" custom_text: Optional[List[str]] = None + tier1: Optional[List[str]] = None + tier2: Optional[List[str]] = None + tier3: Optional[List[str]] = None + tier4_plus: Optional[List[str]] = None + terms: Optional[List[str]] = None # For tier-level explicit config @dataclass @@ -262,12 +267,45 @@ class JobConfig: if "mode" not in anchor_text_data: raise ValueError("'anchor_text_config' must have 'mode' field") mode = anchor_text_data["mode"] - if mode not in ["default", "override", "append"]: - raise ValueError("'anchor_text_config' mode must be 'default', 'override', or 'append'") + if mode not in ["default", "override", "append", "explicit"]: + raise ValueError("'anchor_text_config' mode must be 'default', 'override', 'append', or 'explicit'") + + # Validate explicit mode requires tier-specific terms + if mode == "explicit": + has_tier_terms = any( + anchor_text_data.get(tier_key) is not None + for tier_key in ["tier1", "tier2", "tier3", "tier4_plus"] + ) + if not has_tier_terms: + raise ValueError("'anchor_text_config' with mode 'explicit' must have at least one tier-specific term list (tier1, tier2, tier3, or tier4_plus)") + custom_text = anchor_text_data.get("custom_text") if custom_text is not None and not isinstance(custom_text, list): raise ValueError("'anchor_text_config' custom_text must be an array") - anchor_text_config = AnchorTextConfig(mode=mode, custom_text=custom_text) + + # Parse tier-specific terms for explicit mode + tier1_terms = anchor_text_data.get("tier1") + tier2_terms = anchor_text_data.get("tier2") + tier3_terms = anchor_text_data.get("tier3") + tier4_plus_terms = anchor_text_data.get("tier4_plus") + + if tier1_terms is not None and not isinstance(tier1_terms, list): + raise ValueError("'anchor_text_config' tier1 must be an array") + if tier2_terms is not None and not isinstance(tier2_terms, list): + raise ValueError("'anchor_text_config' tier2 must be an array") + if tier3_terms is not None and not isinstance(tier3_terms, list): + raise ValueError("'anchor_text_config' tier3 must be an array") + if tier4_plus_terms is not None and not isinstance(tier4_plus_terms, list): + raise ValueError("'anchor_text_config' tier4_plus must be an array") + + anchor_text_config = AnchorTextConfig( + mode=mode, + custom_text=custom_text, + tier1=tier1_terms, + tier2=tier2_terms, + tier3=tier3_terms, + tier4_plus=tier4_plus_terms + ) # Parse failure configuration failure_config = None @@ -358,12 +396,24 @@ class JobConfig: if "mode" not in anchor_text_data: raise ValueError(f"'{tier_name}.anchor_text_config' must have 'mode' field") mode = anchor_text_data["mode"] - if mode not in ["default", "override", "append"]: - raise ValueError(f"'{tier_name}.anchor_text_config' mode must be 'default', 'override', or 'append'") + if mode not in ["default", "override", "append", "explicit"]: + raise ValueError(f"'{tier_name}.anchor_text_config' mode must be 'default', 'override', 'append', or 'explicit'") + + # Validate explicit mode requires terms + if mode == "explicit": + terms = anchor_text_data.get("terms") + if not terms or not isinstance(terms, list): + raise ValueError(f"'{tier_name}.anchor_text_config' with mode 'explicit' must have 'terms' array") + custom_text = anchor_text_data.get("custom_text") if custom_text is not None and not isinstance(custom_text, list): raise ValueError(f"'{tier_name}.anchor_text_config' custom_text must be an array") - anchor_text_config = AnchorTextConfig(mode=mode, custom_text=custom_text) + + terms = anchor_text_data.get("terms") + if terms is not None and not isinstance(terms, list): + raise ValueError(f"'{tier_name}.anchor_text_config' terms must be an array") + + anchor_text_config = AnchorTextConfig(mode=mode, custom_text=custom_text, terms=terms) # Parse tier-level models if present tier_models = None @@ -462,12 +512,24 @@ class JobConfig: if "mode" not in anchor_text_data: raise ValueError(f"'{tier_name}.anchor_text_config' must have 'mode' field") mode = anchor_text_data["mode"] - if mode not in ["default", "override", "append"]: - raise ValueError(f"'{tier_name}.anchor_text_config' mode must be 'default', 'override', or 'append'") + if mode not in ["default", "override", "append", "explicit"]: + raise ValueError(f"'{tier_name}.anchor_text_config' mode must be 'default', 'override', 'append', or 'explicit'") + + # Validate explicit mode requires terms + if mode == "explicit": + terms = anchor_text_data.get("terms") + if not terms or not isinstance(terms, list): + raise ValueError(f"'{tier_name}.anchor_text_config' with mode 'explicit' must have 'terms' array") + custom_text = anchor_text_data.get("custom_text") if custom_text is not None and not isinstance(custom_text, list): raise ValueError(f"'{tier_name}.anchor_text_config' custom_text must be an array") - anchor_text_config = AnchorTextConfig(mode=mode, custom_text=custom_text) + + terms = anchor_text_data.get("terms") + if terms is not None and not isinstance(terms, list): + raise ValueError(f"'{tier_name}.anchor_text_config' terms must be an array") + + anchor_text_config = AnchorTextConfig(mode=mode, custom_text=custom_text, terms=terms) # Parse image_config if present (same logic as _parse_tier) image_config = None diff --git a/src/interlinking/content_injection.py b/src/interlinking/content_injection.py index 7cc6d4c..48d2c69 100644 --- a/src/interlinking/content_injection.py +++ b/src/interlinking/content_injection.py @@ -302,6 +302,27 @@ def _get_anchor_texts_for_tier( mode = anchor_text_config.get('mode') if isinstance(anchor_text_config, dict) else getattr(anchor_text_config, 'mode', None) custom_text = anchor_text_config.get('custom_text') if isinstance(anchor_text_config, dict) else getattr(anchor_text_config, 'custom_text', None) + # Handle explicit mode - prioritize explicit terms + if mode == "explicit": + # Check for tier-level explicit terms first + if hasattr(anchor_text_config, 'terms') and anchor_text_config.terms: + return anchor_text_config.terms + + # Check for job-level tier-specific terms + tier_attr = getattr(anchor_text_config, tier, None) + if tier_attr: + return tier_attr + + # Fallback: check if it's a dict with tier key + if isinstance(anchor_text_config, dict): + tier_terms = anchor_text_config.get(tier) + if tier_terms: + return tier_terms + + # If explicit mode but no terms found, return empty list (shouldn't happen due to validation) + logger.warning(f"Explicit mode specified for {tier} but no terms found, falling back to defaults") + return default_anchors + if mode == "override" and custom_text: return custom_text elif mode == "append" and custom_text: diff --git a/tests/integration/test_content_injection_integration.py b/tests/integration/test_content_injection_integration.py index f1ae366..6bdc409 100644 --- a/tests/integration/test_content_injection_integration.py +++ b/tests/integration/test_content_injection_integration.py @@ -4,6 +4,7 @@ Tests full flow with database """ import pytest +from unittest.mock import Mock from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker from src.database.models import Base, User, Project, SiteDeployment, GeneratedContent, ArticleLink @@ -344,6 +345,89 @@ class TestAnchorTextConfigOverrides: db_session.refresh(content) assert '