fixed branded anchor text import calculation logic
parent
6e2977c500
commit
3210dc5739
|
|
@ -1,7 +1,7 @@
|
|||
"""
|
||||
CLI command definitions using Click
|
||||
"""
|
||||
|
||||
import random
|
||||
import click
|
||||
from typing import Optional
|
||||
from src.core.config import get_config, get_bunny_account_api_key, get_concurrent_workers
|
||||
|
|
@ -42,7 +42,8 @@ def create_job_file_for_project(
|
|||
project_name: str,
|
||||
session,
|
||||
tier1_branded_ratio: Optional[float] = None,
|
||||
tier1_branded_text: Optional[str] = None
|
||||
tier1_branded_text: Optional[str] = None,
|
||||
random_deployment_targets: Optional[int] = None
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Create a job JSON file for a newly created project.
|
||||
|
|
@ -53,6 +54,7 @@ def create_job_file_for_project(
|
|||
session: Database session
|
||||
tier1_branded_ratio: Optional ratio of branded anchor text for tier1 (0.0-1.0)
|
||||
tier1_branded_text: Optional branded anchor text (company name) for tier1
|
||||
random_deployment_targets: Optional number of random deployment targets to select (default: random 2-3)
|
||||
|
||||
Returns:
|
||||
Path to created file, or None if creation failed
|
||||
|
|
@ -73,7 +75,10 @@ def create_job_file_for_project(
|
|||
|
||||
t1_count = random.randint(10, 12)
|
||||
t2_count = random.randint(30, 45)
|
||||
num_targets = min(random.randint(2, 3), len(available_domains))
|
||||
if random_deployment_targets is not None:
|
||||
num_targets = min(random_deployment_targets, len(available_domains))
|
||||
else:
|
||||
num_targets = min(random.randint(2, 3), len(available_domains))
|
||||
selected_domains = random.sample(available_domains, num_targets)
|
||||
|
||||
sanitized_name = "".join(c if c.isalnum() or c in ('-', '_') else '-' for c in project_name.lower()).strip('-')
|
||||
|
|
@ -109,19 +114,38 @@ def create_job_file_for_project(
|
|||
project = project_repo.get_by_id(project_id)
|
||||
|
||||
if project and project.main_keyword:
|
||||
# Generate keyword variations for non-branded terms
|
||||
anchor_generator = AnchorTextGenerator()
|
||||
keyword_variations = anchor_generator._generate_from_keyword(project, 10)
|
||||
# First, get the actual available anchor text terms
|
||||
|
||||
# Calculate term distribution (use 20 terms for good distribution)
|
||||
total_terms = 20
|
||||
branded_count = int(total_terms * tier1_branded_ratio)
|
||||
keyword_count = total_terms - branded_count
|
||||
# Use custom anchor text from CORA if available, otherwise generate keyword variations
|
||||
if project.custom_anchor_text and len(project.custom_anchor_text) > 0:
|
||||
keyword_variations = project.custom_anchor_text
|
||||
elif project.related_searches and len(project.related_searches) > 0:
|
||||
keyword_variations = project.related_searches
|
||||
else:
|
||||
anchor_generator = AnchorTextGenerator()
|
||||
keyword_variations = anchor_generator._generate_from_keyword(project, 10)
|
||||
|
||||
# Create anchor text list with branded terms and keyword variations
|
||||
anchor_terms = [tier1_branded_text] * branded_count
|
||||
anchor_terms.extend(keyword_variations[:keyword_count])
|
||||
# Use the ACTUAL count of available terms
|
||||
actual_count = len(keyword_variations)
|
||||
|
||||
# Calculate branded and keyword counts based on actual available terms
|
||||
branded_count = int(actual_count * tier1_branded_ratio)
|
||||
keyword_count = actual_count - branded_count
|
||||
|
||||
# Parse comma-separated branded anchor texts
|
||||
branded_texts = [text.strip() for text in tier1_branded_text.split(',') if text.strip()]
|
||||
|
||||
# Create anchor text list with branded terms (cycling through multiple if provided) and custom anchor text from CORA
|
||||
anchor_terms = []
|
||||
for i in range(branded_count):
|
||||
branded_text = branded_texts[i % len(branded_texts)] # Cycle through branded texts
|
||||
anchor_terms.append(branded_text)
|
||||
# Randomize keyword selection if we're not using all available terms
|
||||
if keyword_count < actual_count:
|
||||
selected_keywords = random.sample(keyword_variations, keyword_count)
|
||||
else:
|
||||
selected_keywords = keyword_variations
|
||||
anchor_terms.extend(selected_keywords)
|
||||
tier1_config["anchor_text_config"] = {
|
||||
"mode": "explicit",
|
||||
"terms": anchor_terms
|
||||
|
|
@ -980,10 +1004,11 @@ def sync_sites(admin_user: Optional[str], admin_password: Optional[str], dry_run
|
|||
@click.option('--name', '-n', required=True, help='Project name')
|
||||
@click.option('--money-site-url', '-m', help='Money site URL (e.g., https://example.com)')
|
||||
@click.option('--custom-anchors', '-a', help='Comma-separated list of custom anchor text (optional)')
|
||||
@click.option('--tier1-branded-ratio', default=0.75, type=float, help='Ratio of branded anchor text for tier1 (default: 0.75)')
|
||||
@click.option('--tier1-branded-ratio', '-t', default=None, type=float, help='Ratio of branded anchor text for tier1 (optional, only prompts if provided)')
|
||||
@click.option('--random-deployment-targets', '-r', type=int, help='Number of random deployment targets to select (default: random 2-3)')
|
||||
@click.option('--username', '-u', help='Username for authentication')
|
||||
@click.option('--password', '-p', help='Password for authentication')
|
||||
def ingest_cora(file_path: str, name: str, money_site_url: Optional[str], custom_anchors: Optional[str], tier1_branded_ratio: float, username: Optional[str], password: Optional[str]):
|
||||
def ingest_cora(file_path: str, name: str, money_site_url: Optional[str], custom_anchors: Optional[str], tier1_branded_ratio: float, random_deployment_targets: Optional[int], username: Optional[str], password: Optional[str]):
|
||||
"""Ingest a CORA .xlsx report and create a new project"""
|
||||
try:
|
||||
if not username or not password:
|
||||
|
|
@ -1056,7 +1081,7 @@ def ingest_cora(file_path: str, name: str, money_site_url: Optional[str], custom
|
|||
tier1_branded_text = None
|
||||
if tier1_branded_ratio is not None and tier1_branded_ratio > 0:
|
||||
tier1_branded_text = click.prompt(
|
||||
"\nEnter branded anchor text (company name) for tier1",
|
||||
"\nEnter branded anchor text (company name) for tier1 (comma-separated for multiple, e.g., 'AGI Fabricators, AGI')",
|
||||
type=str
|
||||
).strip()
|
||||
if not tier1_branded_text:
|
||||
|
|
@ -1069,7 +1094,8 @@ def ingest_cora(file_path: str, name: str, money_site_url: Optional[str], custom
|
|||
project.name,
|
||||
session,
|
||||
tier1_branded_ratio=tier1_branded_ratio,
|
||||
tier1_branded_text=tier1_branded_text
|
||||
tier1_branded_text=tier1_branded_text,
|
||||
random_deployment_targets=random_deployment_targets
|
||||
)
|
||||
if job_file:
|
||||
click.echo(f"Job file created: {job_file}")
|
||||
|
|
@ -1254,6 +1280,7 @@ def list_projects(username: Optional[str], password: Optional[str]):
|
|||
@click.option('--deployment-targets', '-d', multiple=True, help='Deployment target hostnames (can specify multiple times)')
|
||||
@click.option('--tier1-count', default=10, type=int, help='Number of tier1 articles (default: 10)')
|
||||
@click.option('--tier2-count', default=30, type=int, help='Number of tier2 articles (default: 30)')
|
||||
@click.option('--tier1-branded-ratio', '-t', default=None, type=float, help='Ratio of branded anchor text for tier1 (optional, only prompts if provided)')
|
||||
@click.option('--output', '-o', type=click.Path(), help='Output file path (default: jobs/{project_name}.json)')
|
||||
@click.option('--username', '-u', help='Username for authentication')
|
||||
@click.option('--password', '-pwd', help='Password for authentication')
|
||||
|
|
@ -1262,6 +1289,7 @@ def create_job(
|
|||
deployment_targets: tuple,
|
||||
tier1_count: int,
|
||||
tier2_count: int,
|
||||
tier1_branded_ratio: Optional[float],
|
||||
output: Optional[str],
|
||||
username: Optional[str],
|
||||
password: Optional[str]
|
||||
|
|
|
|||
Loading…
Reference in New Issue