fixed branded anchor text import calculation logic

main
PeninsulaInd 2026-01-17 14:28:53 -06:00
parent 6e2977c500
commit 3210dc5739
1 changed files with 45 additions and 17 deletions

View File

@ -1,7 +1,7 @@
""" """
CLI command definitions using Click CLI command definitions using Click
""" """
import random
import click import click
from typing import Optional from typing import Optional
from src.core.config import get_config, get_bunny_account_api_key, get_concurrent_workers from src.core.config import get_config, get_bunny_account_api_key, get_concurrent_workers
@ -42,7 +42,8 @@ def create_job_file_for_project(
project_name: str, project_name: str,
session, session,
tier1_branded_ratio: Optional[float] = None, tier1_branded_ratio: Optional[float] = None,
tier1_branded_text: Optional[str] = None tier1_branded_text: Optional[str] = None,
random_deployment_targets: Optional[int] = None
) -> Optional[str]: ) -> Optional[str]:
""" """
Create a job JSON file for a newly created project. Create a job JSON file for a newly created project.
@ -53,6 +54,7 @@ def create_job_file_for_project(
session: Database session session: Database session
tier1_branded_ratio: Optional ratio of branded anchor text for tier1 (0.0-1.0) tier1_branded_ratio: Optional ratio of branded anchor text for tier1 (0.0-1.0)
tier1_branded_text: Optional branded anchor text (company name) for tier1 tier1_branded_text: Optional branded anchor text (company name) for tier1
random_deployment_targets: Optional number of random deployment targets to select (default: random 2-3)
Returns: Returns:
Path to created file, or None if creation failed Path to created file, or None if creation failed
@ -73,6 +75,9 @@ def create_job_file_for_project(
t1_count = random.randint(10, 12) t1_count = random.randint(10, 12)
t2_count = random.randint(30, 45) t2_count = random.randint(30, 45)
if random_deployment_targets is not None:
num_targets = min(random_deployment_targets, len(available_domains))
else:
num_targets = min(random.randint(2, 3), len(available_domains)) num_targets = min(random.randint(2, 3), len(available_domains))
selected_domains = random.sample(available_domains, num_targets) selected_domains = random.sample(available_domains, num_targets)
@ -109,19 +114,38 @@ def create_job_file_for_project(
project = project_repo.get_by_id(project_id) project = project_repo.get_by_id(project_id)
if project and project.main_keyword: if project and project.main_keyword:
# Generate keyword variations for non-branded terms # First, get the actual available anchor text terms
# Use custom anchor text from CORA if available, otherwise generate keyword variations
if project.custom_anchor_text and len(project.custom_anchor_text) > 0:
keyword_variations = project.custom_anchor_text
elif project.related_searches and len(project.related_searches) > 0:
keyword_variations = project.related_searches
else:
anchor_generator = AnchorTextGenerator() anchor_generator = AnchorTextGenerator()
keyword_variations = anchor_generator._generate_from_keyword(project, 10) keyword_variations = anchor_generator._generate_from_keyword(project, 10)
# Calculate term distribution (use 20 terms for good distribution) # Use the ACTUAL count of available terms
total_terms = 20 actual_count = len(keyword_variations)
branded_count = int(total_terms * tier1_branded_ratio)
keyword_count = total_terms - branded_count
# Create anchor text list with branded terms and keyword variations # Calculate branded and keyword counts based on actual available terms
anchor_terms = [tier1_branded_text] * branded_count branded_count = int(actual_count * tier1_branded_ratio)
anchor_terms.extend(keyword_variations[:keyword_count]) keyword_count = actual_count - branded_count
# Parse comma-separated branded anchor texts
branded_texts = [text.strip() for text in tier1_branded_text.split(',') if text.strip()]
# Create anchor text list with branded terms (cycling through multiple if provided) and custom anchor text from CORA
anchor_terms = []
for i in range(branded_count):
branded_text = branded_texts[i % len(branded_texts)] # Cycle through branded texts
anchor_terms.append(branded_text)
# Randomize keyword selection if we're not using all available terms
if keyword_count < actual_count:
selected_keywords = random.sample(keyword_variations, keyword_count)
else:
selected_keywords = keyword_variations
anchor_terms.extend(selected_keywords)
tier1_config["anchor_text_config"] = { tier1_config["anchor_text_config"] = {
"mode": "explicit", "mode": "explicit",
"terms": anchor_terms "terms": anchor_terms
@ -980,10 +1004,11 @@ def sync_sites(admin_user: Optional[str], admin_password: Optional[str], dry_run
@click.option('--name', '-n', required=True, help='Project name') @click.option('--name', '-n', required=True, help='Project name')
@click.option('--money-site-url', '-m', help='Money site URL (e.g., https://example.com)') @click.option('--money-site-url', '-m', help='Money site URL (e.g., https://example.com)')
@click.option('--custom-anchors', '-a', help='Comma-separated list of custom anchor text (optional)') @click.option('--custom-anchors', '-a', help='Comma-separated list of custom anchor text (optional)')
@click.option('--tier1-branded-ratio', default=0.75, type=float, help='Ratio of branded anchor text for tier1 (default: 0.75)') @click.option('--tier1-branded-ratio', '-t', default=None, type=float, help='Ratio of branded anchor text for tier1 (optional, only prompts if provided)')
@click.option('--random-deployment-targets', '-r', type=int, help='Number of random deployment targets to select (default: random 2-3)')
@click.option('--username', '-u', help='Username for authentication') @click.option('--username', '-u', help='Username for authentication')
@click.option('--password', '-p', help='Password for authentication') @click.option('--password', '-p', help='Password for authentication')
def ingest_cora(file_path: str, name: str, money_site_url: Optional[str], custom_anchors: Optional[str], tier1_branded_ratio: float, username: Optional[str], password: Optional[str]): def ingest_cora(file_path: str, name: str, money_site_url: Optional[str], custom_anchors: Optional[str], tier1_branded_ratio: float, random_deployment_targets: Optional[int], username: Optional[str], password: Optional[str]):
"""Ingest a CORA .xlsx report and create a new project""" """Ingest a CORA .xlsx report and create a new project"""
try: try:
if not username or not password: if not username or not password:
@ -1056,7 +1081,7 @@ def ingest_cora(file_path: str, name: str, money_site_url: Optional[str], custom
tier1_branded_text = None tier1_branded_text = None
if tier1_branded_ratio is not None and tier1_branded_ratio > 0: if tier1_branded_ratio is not None and tier1_branded_ratio > 0:
tier1_branded_text = click.prompt( tier1_branded_text = click.prompt(
"\nEnter branded anchor text (company name) for tier1", "\nEnter branded anchor text (company name) for tier1 (comma-separated for multiple, e.g., 'AGI Fabricators, AGI')",
type=str type=str
).strip() ).strip()
if not tier1_branded_text: if not tier1_branded_text:
@ -1069,7 +1094,8 @@ def ingest_cora(file_path: str, name: str, money_site_url: Optional[str], custom
project.name, project.name,
session, session,
tier1_branded_ratio=tier1_branded_ratio, tier1_branded_ratio=tier1_branded_ratio,
tier1_branded_text=tier1_branded_text tier1_branded_text=tier1_branded_text,
random_deployment_targets=random_deployment_targets
) )
if job_file: if job_file:
click.echo(f"Job file created: {job_file}") click.echo(f"Job file created: {job_file}")
@ -1254,6 +1280,7 @@ def list_projects(username: Optional[str], password: Optional[str]):
@click.option('--deployment-targets', '-d', multiple=True, help='Deployment target hostnames (can specify multiple times)') @click.option('--deployment-targets', '-d', multiple=True, help='Deployment target hostnames (can specify multiple times)')
@click.option('--tier1-count', default=10, type=int, help='Number of tier1 articles (default: 10)') @click.option('--tier1-count', default=10, type=int, help='Number of tier1 articles (default: 10)')
@click.option('--tier2-count', default=30, type=int, help='Number of tier2 articles (default: 30)') @click.option('--tier2-count', default=30, type=int, help='Number of tier2 articles (default: 30)')
@click.option('--tier1-branded-ratio', '-t', default=None, type=float, help='Ratio of branded anchor text for tier1 (optional, only prompts if provided)')
@click.option('--output', '-o', type=click.Path(), help='Output file path (default: jobs/{project_name}.json)') @click.option('--output', '-o', type=click.Path(), help='Output file path (default: jobs/{project_name}.json)')
@click.option('--username', '-u', help='Username for authentication') @click.option('--username', '-u', help='Username for authentication')
@click.option('--password', '-pwd', help='Password for authentication') @click.option('--password', '-pwd', help='Password for authentication')
@ -1262,6 +1289,7 @@ def create_job(
deployment_targets: tuple, deployment_targets: tuple,
tier1_count: int, tier1_count: int,
tier2_count: int, tier2_count: int,
tier1_branded_ratio: Optional[float],
output: Optional[str], output: Optional[str],
username: Optional[str], username: Optional[str],
password: Optional[str] password: Optional[str]