Story 2.3 - content generation script nightmare alomst done - pre-fix outline too big issue

main
PeninsulaInd 2025-10-19 20:29:13 -05:00
parent f73b0700ef
commit b6b0acfcb0
32 changed files with 1162 additions and 82 deletions

View File

@ -0,0 +1,150 @@
#!/usr/bin/env python3
"""
Helper script to create job configuration files programmatically
"""
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent / "src"))
from generation.job_config import JobConfig, TierConfig, ModelConfig, AnchorTextConfig, FailureConfig, InterlinkingConfig
def create_tier1_job(project_id: int, article_count: int = 15):
"""Create a Tier 1 job configuration"""
job = JobConfig(
job_name=f"Tier 1 Launch - Project {project_id}",
project_id=project_id,
description=f"Generate {article_count} high-quality Tier 1 articles",
tiers=[
TierConfig(
tier=1,
article_count=article_count,
models=ModelConfig(
title="openai/gpt-4o-mini",
outline="anthropic/claude-3.5-sonnet",
content="anthropic/claude-3.5-sonnet"
),
anchor_text_config=AnchorTextConfig(mode="default"),
validation_attempts=3
)
],
failure_config=FailureConfig(
max_consecutive_failures=5,
skip_on_failure=True
),
interlinking=InterlinkingConfig(
links_per_article_min=2,
links_per_article_max=4,
include_home_link=True
)
)
return job
def create_multi_tier_job(project_id: int, tier_counts: list):
"""
Create a multi-tier job configuration
Args:
project_id: Project ID
tier_counts: List of article counts for each tier, e.g. [15, 50, 100]
"""
tiers = []
for tier_num, article_count in enumerate(tier_counts, start=1):
if tier_num == 1:
# Tier 1: Highest quality models, strict validation
tier_config = TierConfig(
tier=tier_num,
article_count=article_count,
models=ModelConfig(
title="openai/gpt-4o-mini",
outline="anthropic/claude-3.5-sonnet",
content="anthropic/claude-3.5-sonnet"
),
validation_attempts=3
)
else:
# Tier 2+: Faster/cheaper models, relaxed validation
tier_config = TierConfig(
tier=tier_num,
article_count=article_count,
models=ModelConfig(
title="openai/gpt-4o-mini",
outline="openai/gpt-4o-mini",
content="openai/gpt-4o-mini"
),
validation_attempts=2
)
tiers.append(tier_config)
total_articles = sum(tier_counts)
job = JobConfig(
job_name=f"Multi-Tier Build - Project {project_id}",
project_id=project_id,
description=f"Site build with {total_articles} articles across {len(tier_counts)} tiers",
tiers=tiers,
failure_config=FailureConfig(
max_consecutive_failures=10,
skip_on_failure=True
),
interlinking=InterlinkingConfig(
links_per_article_min=2,
links_per_article_max=4,
include_home_link=True
)
)
return job
if __name__ == "__main__":
# Example usage
if len(sys.argv) < 3:
print("Usage:")
print(" python create_job_config.py <project_id> tier1 <article_count>")
print(" python create_job_config.py <project_id> multi <tier1_count> <tier2_count> [tier3_count] ...")
print("\nExamples:")
print(" python create_job_config.py 1 tier1 15")
print(" python create_job_config.py 1 multi 15 50 100")
print(" python create_job_config.py 1 multi 10 25")
sys.exit(1)
project_id = int(sys.argv[1])
job_type = sys.argv[2]
if job_type == "tier1":
if len(sys.argv) < 4:
print("Error: tier1 requires article_count")
print("Example: python create_job_config.py 1 tier1 15")
sys.exit(1)
article_count = int(sys.argv[3])
job = create_tier1_job(project_id, article_count)
filename = f"jobs/project_{project_id}_tier1_{article_count}articles.json"
elif job_type == "multi":
if len(sys.argv) < 4:
print("Error: multi requires at least one tier count")
print("Example: python create_job_config.py 1 multi 15 50 100")
sys.exit(1)
tier_counts = [int(count) for count in sys.argv[3:]]
job = create_multi_tier_job(project_id, tier_counts)
total = sum(tier_counts)
filename = f"jobs/project_{project_id}_multi_{len(tier_counts)}tiers_{total}articles.json"
else:
print(f"Unknown job type: {job_type}")
print("Use 'tier1' or 'multi'")
sys.exit(1)
job.to_file(filename)
print(f"Created job configuration: {filename}")
print(f"\nJob details:")
print(f" Project ID: {project_id}")
print(f" Total articles: {job.get_total_articles()}")
if job_type == "multi":
for tier in job.tiers:
print(f" Tier {tier.tier}: {tier.article_count} articles")
print(f"\nTo run this job:")
print(f" uv run python main.py generate-batch -j {filename} -u admin -p yourpassword")

View File

@ -0,0 +1 @@
"Mastering Shaft Machining: Essential Techniques for Precision CNC Turning and Broaching"

View File

@ -0,0 +1,7 @@
{
"stage": "title",
"attempt": 1,
"is_valid": true,
"errors": [],
"title": "\"Mastering Shaft Machining: Essential Techniques for Precision CNC Turning and Broaching\""
}

View File

@ -0,0 +1,46 @@
{
"sections": [
{
"h2": "Introduction to shaft machining",
"h3s": [
"What is shaft machining?",
"Importance of precision in shaft machining",
"Common applications of shaft machining"
]
},
{
"h2": "Essential techniques for CNC turning",
"h3s": [
"Overview of CNC turning process",
"Choosing the right CNC machine for shaft machining",
"Setting up CNC turning parameters"
]
},
{
"h2": "Broaching techniques for effective machining",
"h3s": [
"What is broaching in machining?",
"Types of broaching tools and their uses",
"Best practices for broaching shafts"
]
},
{
"h2": "Understanding machining processes and components",
"h3s": [
"Comparison of CNC milling and turning",
"Role of materials like stainless steel and brass",
"How torque affects shaft machining",
"Importance of specifications in shaft design",
"Utilizing automated machining for efficiency"
]
},
{
"h2": "Frequently Asked Questions",
"h3s": [
"What are the key differences between CNC turning and broaching?",
"How can I ensure precision in custom shaft machining?",
"What are common challenges in shaft machining and how can they be overcome?"
]
}
]
}

View File

@ -0,0 +1,53 @@
{
"stage": "outline",
"attempt": 1,
"is_valid": true,
"errors": [],
"missing": {},
"outline": {
"sections": [
{
"h2": "Introduction to shaft machining",
"h3s": [
"What is shaft machining?",
"Importance of precision in shaft machining",
"Common applications of shaft machining"
]
},
{
"h2": "Essential techniques for CNC turning",
"h3s": [
"Overview of CNC turning process",
"Choosing the right CNC machine for shaft machining",
"Setting up CNC turning parameters"
]
},
{
"h2": "Broaching techniques for effective machining",
"h3s": [
"What is broaching in machining?",
"Types of broaching tools and their uses",
"Best practices for broaching shafts"
]
},
{
"h2": "Understanding machining processes and components",
"h3s": [
"Comparison of CNC milling and turning",
"Role of materials like stainless steel and brass",
"How torque affects shaft machining",
"Importance of specifications in shaft design",
"Utilizing automated machining for efficiency"
]
},
{
"h2": "Frequently Asked Questions",
"h3s": [
"What are the key differences between CNC turning and broaching?",
"How can I ensure precision in custom shaft machining?",
"What are common challenges in shaft machining and how can they be overcome?"
]
}
]
}
}

View File

@ -0,0 +1,90 @@
<h1>Mastering Shaft Machining: Essential Techniques for Precision CNC Turning and Broaching</h1>
<h2>Introduction to shaft machining</h2>
<h3>What is shaft machining?</h3>
<p>Shaft machining represents a specialized manufacturing process that creates cylindrical components essential for transmitting torque and rotational motion across mechanical systems. This precision-driven technique employs various methods including CNC turning, milling, and broaching to transform raw materials like stainless steel, aluminum, and brass into functional drive shaft components. Modern shaft production integrates automated machining systems that maintain tight tolerance requirements while optimizing surface finish quality. The process encompasses everything from initial material selection through final surface finishing operations, ensuring each component meets exact specifications for diameter, length, and concentricity.</p>
<p>Manufacturing facilities utilize advanced CNC machining centers equipped with multi-axis capabilities to produce shafts ranging from miniature armature components to large industrial applications. The integration of computer-aided design with machining processes enables manufacturers to achieve accuracy levels measured in microns. Material removal occurs through controlled cutting operations where tooling interfaces with rotating workpieces, generating precise geometries that satisfy demanding engineering requirements. Supply chain management plays a crucial role in coordinating raw material procurement with production schedules to maintain consistent output quality.</p>
<h3>Importance of precision in shaft machining</h3>
<p>Precision shaft machining directly impacts equipment performance, operational longevity, and safety across industrial applications. Components manufactured outside specified tolerance ranges create excessive vibration, premature bearing failure, and catastrophic system breakdowns. Modern CNC turning operations achieve tolerances within ±0.0001 inches, ensuring proper fit and function within assemblies. The relationship between shaft dimensions and mating components requires meticulous attention to surface finish parameters, typically ranging from 16 to 63 microinches Ra depending on application requirements.</p>
<p>Heat generated during machining operations affects dimensional stability, making thermal management essential for maintaining accuracy throughout production runs. Manufacturers implement coolant systems and controlled cutting parameters to minimize thermal expansion that compromises finished dimensions. The precision achieved through proper machining techniques eliminates costly rework, reduces assembly time, and extends component service life. Quality control protocols incorporate coordinate measuring machines and laser scanning technology to verify conformance with engineering specifications before parts enter service.</p>
<h3>Common applications of shaft machining</h3>
<p>Shaft machining services support diverse industries including automotive, aerospace, medical devices, and industrial equipment manufacturing. Drive shaft production for automotive applications demands components capable of transmitting high torque loads while maintaining balance at elevated rotational speeds. Precision machining enables the creation of splined connections, keyways, and bearing journals that integrate seamlessly with transmission systems. Medical equipment manufacturers require ultra-precise shafts for surgical instruments and diagnostic devices where accuracy directly affects patient outcomes.</p>
<p>Industrial machinery relies on custom shaft machining to create components for pumps, compressors, and power transmission equipment. The aerospace sector demands shafts manufactured from specialized materials with documented traceability and rigorous inspection protocols. Metal stamping operations utilize precision shafts in die casting equipment where dimensional consistency ensures product quality. Each application presents unique challenges regarding material selection, surface treatment, and geometric complexity that experienced machining facilities address through tailored manufacturing approaches.</p>
<h2>Essential techniques for CNC turning</h2>
<h3>Overview of CNC turning process</h3>
<p>CNC turning employs computer-controlled lathes to rotate workpieces against stationary cutting tools, removing material to create cylindrical geometries. The process excels at producing shafts with circular cross-sections, offering superior efficiency compared to manual machining methods. Modern turning centers feature live tooling capabilities that enable secondary operations like drilling, boring, and milling without workpiece repositioning. Programming software translates CAD models into G-code instructions that control spindle speed, feed rate, and tool positioning throughout the machining cycle.</p>
<p>The turning process accommodates various materials including steel, stainless steel, brass, and aluminum, each requiring specific cutting parameters for optimal results. Tooling selection depends on material hardness, desired surface finish, and production volume requirements. High-speed machining techniques reduce cycle times while maintaining dimensional accuracy through advanced tool path strategies. Manufacturers leverage turning operations to create complex shaft geometries including tapers, grooves, and threaded sections in single setups, minimizing handling and improving concentricity.</p>
<h3>Choosing the right CNC machine for shaft machining</h3>
<p>Selecting appropriate CNC equipment requires analysis of part specifications, production volume, and material characteristics. Swiss-type lathes excel at producing small-diameter, high-precision shafts with length-to-diameter ratios exceeding 10:1. These machines provide exceptional rigidity through guide bushing support that minimizes deflection during cutting operations. Conventional turning centers suit larger shaft diameters and shorter lengths where tool access and rigidity pose fewer challenges. Multi-spindle configurations increase throughput for high-volume shaft production by machining multiple workpieces simultaneously.</p>
<p>Machine selection also considers automation requirements, with bar feeders and robotic loading systems enhancing productivity for repetitive shaft manufacturing. Spindle power and torque capacity must align with material removal rates necessary to maintain competitive cycle times. Precision ground ways, thermal stability features, and advanced control systems contribute to achieving tight tolerance requirements. Investment decisions balance initial equipment costs against long-term production efficiency, quality consistency, and maintenance requirements specific to shaft machining applications.</p>
<h3>Setting up CNC turning parameters</h3>
<p>Proper parameter configuration optimizes tool life, surface finish quality, and dimensional accuracy during shaft machining operations. Cutting speed selection depends on workpiece material properties, with stainless steel requiring lower speeds than aluminum to manage heat generation and prevent work hardening. Feed rates influence surface finish characteristics, with finer feeds producing smoother surfaces suitable for bearing mounting areas. Depth of cut balances material removal efficiency against tool stress and vibration control, typically ranging from 0.050 to 0.200 inches for roughing operations.</p>
<p>Coolant delivery strategies significantly impact machining performance, particularly when working with materials prone to heat-related dimensional changes. Through-spindle coolant systems direct high-pressure fluid directly to the cutting zone, improving chip evacuation and temperature control. Tool geometry selection including rake angle, clearance angle, and nose radius affects cutting forces and surface finish outcomes. Machinists verify setup accuracy using dial indicators and test cuts before committing to full production runs, ensuring all parameters produce components meeting specifications.</p>
<h2>Broaching techniques for effective machining</h2>
<h3>What is broaching in machining?</h3>
<p>Broaching represents a precision machining process that removes material using a multi-toothed cutting tool pulled or pushed across a workpiece surface. This technique creates complex internal and external profiles including keyways, splines, and hexagonal shapes on shaft surfaces with exceptional accuracy. Each successive tooth on the broaching tool removes a predetermined amount of material, progressively forming the desired geometry in a single pass. The process achieves superior surface finish and dimensional consistency compared to alternative methods like milling or shaping for specific geometric features.</p>
<p>Internal broaching machines create features within shaft bores, while surface broaching operations form external profiles on cylindrical components. The linear cutting action produces minimal distortion and maintains tight tolerance control, making broaching ideal for high-volume shaft production. Manufacturers employ broaching when geometric complexity, production volume, and quality requirements justify the specialized tooling investment. The technique accommodates various materials including steel, brass, and aluminum, though tool design must account for material-specific cutting characteristics.</p>
<h3>Types of broaching tools and their uses</h3>
<p>Keyway broaches create longitudinal slots in shafts for key-based torque transmission, available in various widths conforming to industry standards. Spline broaches form internal or external gear-like profiles that distribute torque loads across multiple contact points, enhancing power transmission capacity. Round broaches enlarge and finish internal diameters to precise dimensions with superior surface finish characteristics. Pot broaches machine external surfaces on shaft ends, creating hexagonal or square profiles for wrench engagement.</p>
<p>Tool material selection impacts broaching performance and longevity, with high-speed steel offering economical solutions for moderate production volumes. Carbide-tipped broaches extend tool life when machining abrasive materials or high-volume applications justify increased tooling costs. Surface broaches create flat surfaces or complex contours on external shaft geometries. Combination broaches integrate multiple cutting sections to produce compound profiles in single operations, reducing setup time and improving geometric relationships between features.</p>
<h3>Best practices for broaching shafts</h3>
<p>Successful broaching operations require proper workpiece fixturing that prevents movement during the cutting stroke while maintaining alignment with the tool path. Adequate lubrication reduces friction and heat generation, extending tool life and improving surface finish quality. Cutting speeds for broaching typically remain lower than turning or milling operations, with feed determined by the tooth pitch designed into the broach. Regular tool inspection identifies wear patterns before they compromise dimensional accuracy or surface quality.</p>
<p>Material preparation through proper heat treatment timing ensures consistent cutting forces and predictable tool wear. Broaching soft materials before hardening operations prevents excessive tool stress, while some applications require post-heat treatment broaching to achieve final dimensions. Chip management systems remove debris from the cutting zone, preventing recutting and surface damage. Manufacturers maintain detailed tooling records tracking broach usage, sharpening history, and dimensional performance to optimize replacement schedules and maintain quality consistency throughout production runs.</p>
<h2>Understanding machining processes and components</h2>
<h3>Comparison of CNC milling and turning</h3>
<p>CNC milling employs rotating cutting tools that traverse stationary workpieces, creating complex geometries including flats, pockets, and non-circular profiles on shaft surfaces. This process excels when shaft designs incorporate features like cross-holes, keyways, or mounting flats that deviate from purely cylindrical forms. Turning operations rotate the workpiece against stationary tools, optimizing efficiency for creating round shaft geometries with superior concentricity. The fundamental difference in material removal mechanics influences surface finish characteristics, with turning typically producing smoother finishes on cylindrical surfaces.</p>
<p>Machining components selection depends on geometric requirements, with many shaft designs incorporating both turned and milled features. Multi-tasking machines integrate turning and milling capabilities within single setups, maintaining precise geometric relationships between features while reducing handling. Turning operations generally achieve faster material removal rates for cylindrical shaft sections, while milling provides versatility for complex feature creation. Cost considerations favor turning for high-volume cylindrical shaft production, though milling becomes economical when geometric complexity justifies the process.</p>
<h3>Role of materials like stainless steel and brass</h3>
<p>Stainless steel shaft machining demands careful parameter selection due to material work hardening characteristics and lower thermal conductivity compared to carbon steels. The material's corrosion resistance makes it essential for food processing, medical, and marine applications where environmental exposure threatens component integrity. Machining stainless steel requires sharp tooling, positive rake angles, and adequate coolant delivery to manage heat and prevent work hardening that accelerates tool wear.</p>
<p>Brass offers excellent machinability, producing superior surface finishes with minimal tool wear and reduced cutting forces. The material's self-lubricating properties during machining enable higher cutting speeds and feeds compared to ferrous materials. Brass shafts serve applications requiring electrical conductivity, corrosion resistance, and aesthetic appeal. Material selection balances mechanical properties including tensile strength and hardness against machinability considerations and application-specific performance requirements. Aluminum provides lightweight alternatives with good machinability, though lower strength limits applications to reduced load scenarios.</p>
<h3>How torque affects shaft machining</h3>
<p>Torque transmission requirements directly influence shaft diameter, length, and feature design specifications that machining processes must achieve. Engineers calculate required shaft dimensions based on transmitted torque loads, applying safety factors that account for stress concentrations at geometric transitions. Machining operations must maintain dimensional accuracy in critical areas like bearing journals and coupling interfaces where torque transfer occurs. Surface finish specifications in torque-transmitting regions prevent fretting wear and ensure proper load distribution.</p>
<p>Keyway and spline geometries machined into shafts create mechanical interlocks that transmit torque between components. The precision of these features affects load distribution and component longevity under cyclic loading conditions. Machining-induced residual stresses influence fatigue resistance in high-torque applications, making process parameter selection and finishing operations critical for performance. Material removal strategies must avoid introducing stress concentrations that compromise shaft strength in service. Understanding torque requirements guides machining process selection, tooling choices, and quality control protocols throughout shaft production.</p>
<h3>Importance of specifications in shaft design</h3>
<p>Engineering specifications define critical dimensions, tolerances, surface finish requirements, and material properties that machining processes must achieve. Detailed drawings communicate geometric dimensioning and tolerancing (GD&T) callouts that control form, orientation, and location of shaft features. Specifications establish acceptance criteria for dimensional accuracy, surface roughness, and geometric relationships between features. Manufacturers reference these documents throughout production to ensure conformance and facilitate quality verification.</p>
<p>Material specifications identify alloy composition, heat treatment requirements, and mechanical property targets essential for application performance. Surface finish specifications define roughness parameters measured in microinches or microns, guiding machining parameter selection and finishing operations. Tolerance specifications balance functional requirements against manufacturing capability and cost implications. Comprehensive specifications enable consistent shaft production across multiple manufacturing facilities and support supply chain management by establishing clear quality expectations. Documentation provides traceability linking finished components to inspection records, material certifications, and process parameters.</p>
<h3>Utilizing automated machining for efficiency</h3>
<p>Automated machining systems integrate robotic loading, in-process measurement, and adaptive control to optimize shaft production efficiency. Bar feeding mechanisms supply raw material continuously to CNC turning centers, enabling lights-out manufacturing for extended production runs. Robotic cells transfer workpieces between machining operations, reducing labor costs while improving consistency and throughput. Automated tool changers swap cutting implements based on programmed sequences, minimizing non-productive time during complex shaft machining cycles.</p>
<p>In-process gauging systems measure critical dimensions during machining, enabling real-time tool offset adjustments that maintain tight tolerances despite tool wear. Automated systems generate production data supporting statistical process control initiatives that identify trends before they produce nonconforming parts. Integration with enterprise resource planning systems coordinates shaft manufacturing with inventory management and customer delivery schedules. The initial investment in automation infrastructure delivers returns through reduced labor costs, improved quality consistency, and increased production capacity. Manufacturers evaluate automation opportunities by analyzing production volumes, part complexity, and labor availability within their specific operational context.</p>
<h2>Frequently Asked Questions</h2>
<h3>What are the key differences between CNC turning and broaching?</h3>
<p>The key differences between CNC turning and broaching involve fundamental machining mechanics and optimal applications. CNC turning rotates the workpiece against stationary cutting tools to create cylindrical geometries, offering versatility for various shaft diameters and lengths with programmable control over dimensions. This process excels at producing round profiles, tapers, and threaded sections efficiently across diverse production volumes. Broaching employs multi-toothed linear cutting tools that form complex profiles like keyways and splines in single passes, achieving superior accuracy and surface finish for these specific geometries. While turning accommodates design changes through programming modifications, broaching requires dedicated tooling for each profile geometry, making it economical primarily for high-volume production. Turning operations provide flexibility across material types and shaft configurations, whereas broaching delivers unmatched precision and consistency for specific feature types. The processes complement each other in comprehensive shaft manufacturing, with turning creating base geometries and broaching adding specialized features requiring exceptional accuracy and repeatability.</p>
<h3>How can I ensure precision in custom shaft machining?</h3>
<p>Ensuring precision in custom shaft machining requires comprehensive attention to equipment capability, process control, and quality verification throughout production. Start by selecting CNC machines with adequate rigidity, thermal stability, and positioning accuracy to maintain tight tolerances during extended machining cycles. Implement detailed setup procedures including proper workholding, tool calibration, and proven parameter sets validated through test cuts before full production. Utilize statistical process control monitoring critical dimensions throughout production runs, enabling early detection of trends that might compromise accuracy. Maintain cutting tools through scheduled replacement or resharpening programs that prevent dimensional drift from tool wear. Control environmental factors including temperature fluctuations that affect material dimensions and machine performance. Employ coordinate measuring machines or laser scanning systems for comprehensive dimensional verification against specifications. Document all process parameters, inspection results, and material certifications to establish traceability and support continuous improvement initiatives. Partner with experienced machining services providers who demonstrate proven capability in similar shaft applications and maintain quality management systems certified to industry standards. Regular equipment maintenance and calibration sustain precision capability over time.</p>
<h3>What are common challenges in shaft machining and how can they be overcome?</h3>
<p>Common challenges in shaft machining include maintaining concentricity on long, slender components prone to deflection during cutting operations. Overcome this through tailstock support, steady rests, or specialized fixturing that minimizes workpiece deflection under cutting forces. Material-related challenges like work hardening in stainless steel require sharp tooling, optimized cutting parameters, and adequate coolant delivery to manage heat generation. Achieving specified surface finish on bearing journals demands careful attention to tool geometry, feed rates, and finishing passes that eliminate tool marks and chatter patterns. Thermal growth during machining affects dimensional

View File

@ -0,0 +1,13 @@
{
"passed": false,
"errors": [
{
"rule": "h2_entities",
"severity": "error",
"message": "H2 tags with entities below CORA target",
"expected": 7,
"actual": 4
}
],
"warnings": []
}

View File

@ -0,0 +1 @@
"Mastering Precision: A Comprehensive Guide to Shaft Machining Techniques for Optimal Performance"

View File

@ -0,0 +1,7 @@
{
"stage": "title",
"attempt": 1,
"is_valid": true,
"errors": [],
"title": "\"Mastering Precision: A Comprehensive Guide to Shaft Machining Techniques for Optimal Performance\""
}

View File

@ -0,0 +1,46 @@
{
"sections": [
{
"h2": "Introduction to shaft machining",
"h3s": [
"What is shaft machining?",
"Why is precision important in shaft machining?",
"Overview of common shaft machining processes"
]
},
{
"h2": "Key shaft machining techniques for optimal performance",
"h3s": [
"CNC turning: An overview of the process",
"Milling techniques in shaft machining",
"Boring for precision hole-making",
"Broaching: A method for complex shapes",
"Understanding die casting in shaft fabrication"
]
},
{
"h2": "Materials used in shaft machining",
"h3s": [
"Stainless steel: Benefits and applications",
"Brass in shaft machining: Characteristics and uses",
"Choosing the right steel for your project"
]
},
{
"h2": "Challenges in shaft machining and solutions",
"h3s": [
"How to address machining tolerances?",
"Dealing with torque and its impact on shaft design",
"Optimizing the supply chain for machining services"
]
},
{
"h2": "Frequently Asked Questions",
"h3s": [
"What are the common machining processes for shafts?",
"How can custom shaft machining meet specific project needs?",
"What factors influence the cost of shaft machining services?"
]
}
]
}

View File

@ -0,0 +1,53 @@
{
"stage": "outline",
"attempt": 1,
"is_valid": true,
"errors": [],
"missing": {},
"outline": {
"sections": [
{
"h2": "Introduction to shaft machining",
"h3s": [
"What is shaft machining?",
"Why is precision important in shaft machining?",
"Overview of common shaft machining processes"
]
},
{
"h2": "Key shaft machining techniques for optimal performance",
"h3s": [
"CNC turning: An overview of the process",
"Milling techniques in shaft machining",
"Boring for precision hole-making",
"Broaching: A method for complex shapes",
"Understanding die casting in shaft fabrication"
]
},
{
"h2": "Materials used in shaft machining",
"h3s": [
"Stainless steel: Benefits and applications",
"Brass in shaft machining: Characteristics and uses",
"Choosing the right steel for your project"
]
},
{
"h2": "Challenges in shaft machining and solutions",
"h3s": [
"How to address machining tolerances?",
"Dealing with torque and its impact on shaft design",
"Optimizing the supply chain for machining services"
]
},
{
"h2": "Frequently Asked Questions",
"h3s": [
"What are the common machining processes for shafts?",
"How can custom shaft machining meet specific project needs?",
"What factors influence the cost of shaft machining services?"
]
}
]
}
}

View File

@ -2,7 +2,7 @@
DATABASE_URL=sqlite:///./content_automation.db
# AI Service Configuration (OpenRouter)
AI_API_KEY=your_openrouter_api_key_here
AI_API_KEY=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
AI_API_BASE_URL=https://openrouter.ai/api/v1
AI_MODEL=anthropic/claude-3.5-sonnet

View File

@ -1,6 +1,6 @@
{
"job_name": "Multi-Tier Site Build",
"project_id": 1,
"project_id": 2,
"description": "Complete site build with 165 articles across 3 tiers",
"tiers": [
{
@ -8,8 +8,8 @@
"article_count": 15,
"models": {
"title": "openai/gpt-4o-mini",
"outline": "anthropic/claude-3.5-sonnet",
"content": "anthropic/claude-3.5-sonnet"
"outline": "openai/gpt-4o-mini",
"content": "anthropic/claude-4.5-sonnet"
},
"anchor_text_config": {
"mode": "default"
@ -21,8 +21,8 @@
"article_count": 50,
"models": {
"title": "openai/gpt-4o-mini",
"outline": "openai/gpt-4o",
"content": "openai/gpt-4o"
"outline": "openai/gpt-4o-mini",
"content": "openai/gpt-4o-mini"
},
"anchor_text_config": {
"mode": "append",
@ -36,7 +36,7 @@
"models": {
"title": "openai/gpt-4o-mini",
"outline": "openai/gpt-4o-mini",
"content": "anthropic/claude-3-haiku"
"content": "openai/gpt-4o-mini"
},
"anchor_text_config": {
"mode": "default"
@ -45,7 +45,7 @@
}
],
"failure_config": {
"max_consecutive_failures": 10,
"max_consecutive_failures": 3,
"skip_on_failure": true
},
"interlinking": {

View File

@ -0,0 +1,31 @@
{
"job_name": "Tier 1 Launch - Project 2",
"project_id": 2,
"description": "Generate 15 high-quality Tier 1 articles",
"tiers": [
{
"tier": 1,
"article_count": 15,
"models": {
"title": "openai/gpt-4o-mini",
"outline": "anthropic/claude-3.5-sonnet",
"content": "anthropic/claude-3.5-sonnet"
},
"anchor_text_config": {
"mode": "default",
"custom_text": null,
"additional_text": null
},
"validation_attempts": 3
}
],
"failure_config": {
"max_consecutive_failures": 5,
"skip_on_failure": true
},
"interlinking": {
"links_per_article_min": 2,
"links_per_article_max": 4,
"include_home_link": true
}
}

View File

@ -13,25 +13,27 @@
"ai_service": {
"provider": "openrouter",
"base_url": "https://openrouter.ai/api/v1",
"model": "anthropic/claude-3.5-sonnet",
"model": "anthropic/claude-sonnet-4.5",
"max_tokens": 4000,
"temperature": 0.7,
"timeout": 30,
"available_models": {
"claude-sonnet-4.5": "anthropic/claude-sonnet-4.5",
"claude-3.5-sonnet": "anthropic/claude-3.5-sonnet",
"claude-3-haiku": "anthropic/claude-3-haiku",
"gpt-4o": "openai/gpt-4o",
"gpt-4o-mini": "openai/gpt-4o-mini",
"llama-3.1-70b": "meta-llama/llama-3.1-70b-instruct",
"llama-3.1-8b": "meta-llama/llama-3.1-8b-instruct",
"gemini-pro": "google/gemini-pro-1.5"
"gemini-2.5-flash": "google/gemini-2.5-flash"
}
},
"content_rules": {
"universal": {
"min_content_length": 1000,
"max_content_length": 5000,
"title_exact_match_required": true,
"word_count_tolerance": 10,
"default_term_frequency": 2,
"title_exact_match_required": false,
"h1_exact_match_required": true,
"h2_exact_match_min": 1,
"h3_exact_match_min": 1,

View File

@ -1,43 +1,44 @@
# Core Framework
fastapi==0.104.1
uvicorn[standard]==0.24.0
fastapi==0.119.0
uvicorn==0.38.0
# CLI Framework
click==8.1.7
typer==0.9.0
click==8.3.0
typer==0.19.2
# Database
sqlalchemy==2.0.23
alembic==1.12.1
sqlalchemy==2.0.44
alembic==1.17.0
# Authentication
passlib[bcrypt]==1.7.4
bcrypt==4.0.1 # Compatible with passlib 1.7.4
python-jose[cryptography]==3.3.0
bcrypt==4.0.1
python-jose==3.5.0
cryptography==46.0.3
# Configuration
pydantic==2.5.0
python-dotenv==1.0.0
pydantic==2.12.3
python-dotenv==1.1.1
# Cloud Providers
boto3==1.34.0
azure-storage-blob==12.19.0
requests==2.31.0
boto3==1.40.55
azure-storage-blob==12.27.0
requests==2.32.5
# Data Processing
pandas==2.1.4
openpyxl==3.1.2
beautifulsoup4==4.12.2
pandas==2.3.3
openpyxl==3.1.5
beautifulsoup4==4.14.2
# AI/ML
openai==1.3.7
openai==2.5.0
# Testing
pytest==7.4.3
pytest==8.4.2
pytest-asyncio==0.21.1
pytest-mock==3.12.0
httpx==0.25.2 # Required for FastAPI TestClient
moto==4.2.14
pytest-mock==3.15.1
httpx==0.28.1
moto==5.1.15
# Development
black==23.11.0

View File

@ -874,9 +874,10 @@ def list_projects(username: Optional[str], password: Optional[str]):
@app.command()
@click.option("--job-file", "-j", required=True, help="Path to job configuration JSON file")
@click.option("--force-regenerate", "-f", is_flag=True, help="Force regeneration even if content exists")
@click.option("--debug", "-d", is_flag=True, help="Enable debug mode (saves generated content to debug_output/)")
@click.option("--username", "-u", help="Username for authentication")
@click.option("--password", "-p", help="Password for authentication")
def generate_batch(job_file: str, force_regenerate: bool, username: Optional[str], password: Optional[str]):
def generate_batch(job_file: str, force_regenerate: bool, debug: bool, username: Optional[str], password: Optional[str]):
"""
Generate batch of articles from a job configuration file
@ -919,8 +920,21 @@ def generate_batch(job_file: str, force_regenerate: bool, username: Optional[str
click.echo("\nStarting batch generation...")
click.echo("-" * 80)
def progress_callback(tier, article_num, total, status, **kwargs):
if status == "starting":
def progress_callback(tier=None, article_num=None, total=None, status=None, stage=None, **kwargs):
if stage:
if status == "completed":
if stage == "title":
title = kwargs.get("title", "")
click.echo(f" - Title generated: {title}")
elif stage == "outline":
outline = kwargs.get("outline", {})
h2_count = len(outline.get("sections", []))
h3_count = sum(len(s.get("h3s", [])) for s in outline.get("sections", []))
click.echo(f" - Outline generated: {h2_count} H2s, {h3_count} H3s")
elif stage == "content":
word_count = kwargs.get("word_count", 0)
click.echo(f" - Content generated: {word_count} words")
elif status == "starting":
click.echo(f"[Tier {tier}] Article {article_num}/{total}: Generating...")
elif status == "completed":
content_id = kwargs.get("content_id", "?")
@ -932,8 +946,11 @@ def generate_batch(job_file: str, force_regenerate: bool, username: Optional[str
error = kwargs.get("error", "Unknown error")
click.echo(f"[Tier {tier}] Article {article_num}/{total}: Failed - {error}", err=True)
if debug:
click.echo("\n[DEBUG MODE ENABLED - Content will be saved to debug_output/]\n")
processor = BatchProcessor(session)
result = processor.process_job(job_config, progress_callback)
result = processor.process_job(job_config, progress_callback, debug=debug)
click.echo("-" * 80)
click.echo("\nBatch Generation Complete!")

View File

@ -30,6 +30,8 @@ class AIServiceConfig(BaseModel):
class UniversalRulesConfig(BaseModel):
min_content_length: int = 1000
max_content_length: int = 5000
word_count_tolerance: int = 10
default_term_frequency: int = 2
title_exact_match_required: bool = True
h1_exact_match_required: bool = True
h2_exact_match_min: int = 1

View File

@ -7,6 +7,7 @@ from sqlalchemy.orm import Session
from sqlalchemy.exc import IntegrityError
from src.database.interfaces import IUserRepository, ISiteDeploymentRepository, IProjectRepository, IGeneratedContentRepository
from src.database.models import User, SiteDeployment, Project, GeneratedContent
from src.core.config import get_config
class UserRepository(IUserRepository):
@ -266,12 +267,13 @@ class ProjectRepository(IProjectRepository):
Raises:
ValueError: If user_id doesn't exist
"""
config = get_config()
project = Project(
user_id=user_id,
name=name,
main_keyword=data.get("main_keyword"),
word_count=data.get("word_count", 1250),
term_frequency=data.get("term_frequency", 3),
term_frequency=data.get("term_frequency") or config.content_rules.universal.default_term_frequency,
related_search_density=data.get("related_search_density"),
entity_density=data.get("entity_density"),
lsi_density=data.get("lsi_density"),

View File

@ -6,6 +6,7 @@ import os
import json
from typing import Dict, Any, Optional
from openai import OpenAI
from dotenv import load_dotenv
from src.core.config import Config
@ -24,6 +25,8 @@ class AIClient:
Args:
config: Application configuration (uses get_config() if None)
"""
load_dotenv()
from src.core.config import get_config
self.config = config or get_config()
@ -31,9 +34,14 @@ class AIClient:
if not api_key:
raise AIClientError("AI_API_KEY environment variable not set")
# OpenRouter requires specific headers and configuration
self.client = OpenAI(
base_url=self.config.ai_service.base_url,
api_key=api_key,
default_headers={
"HTTP-Referer": "https://github.com/yourusername/Big-Link-Man",
"X-Title": "Big Link Man Content Generator"
}
)
self.default_model = self.config.ai_service.model

View File

@ -4,16 +4,23 @@ Content augmentation service for programmatic CORA target fixes
import re
import random
from typing import List, Dict, Any, Tuple
import json
from pathlib import Path
from typing import List, Dict, Any, Tuple, Optional, TYPE_CHECKING
from bs4 import BeautifulSoup
from src.generation.rule_engine import ContentHTMLParser
if TYPE_CHECKING:
from src.generation.ai_client import AIClient
class ContentAugmenter:
"""Service for programmatically augmenting content to meet CORA targets"""
def __init__(self):
def __init__(self, ai_client: Optional['AIClient'] = None):
self.parser = ContentHTMLParser()
self.ai_client = ai_client
self.prompts_dir = Path(__file__).parent / "prompts"
def augment_outline(
self,
@ -165,6 +172,142 @@ class ContentAugmenter:
return html_content, log
def augment_content_with_ai(
self,
html_content: str,
missing: Dict[str, Any],
main_keyword: str,
entities: List[str],
related_searches: List[str],
model: str = "gpt-4o-mini"
) -> Tuple[str, Dict[str, Any]]:
"""
Use AI to generate additional paragraphs to augment content
Args:
html_content: Current HTML content
missing: Dictionary of missing elements
main_keyword: Main keyword
entities: List of entities
related_searches: List of related searches
model: AI model to use
Returns:
Tuple of (augmented_html, augmentation_log)
"""
if not self.ai_client:
raise ValueError("AI client not provided to augmenter")
log = {
"method": "ai_augmentation",
"changes": [],
"paragraphs_added": 0,
"words_added": 0
}
prompt_template = self._load_prompt("content_augmentation.json")
missing_description = self._format_missing_elements(missing)
target_entities = ", ".join(entities[:5]) if entities else "N/A"
target_searches = ", ".join(related_searches[:5]) if related_searches else "N/A"
word_count_deficit = missing.get("word_count_deficit", 0)
target_word_count = word_count_deficit + 100
soup = BeautifulSoup(html_content, 'html.parser')
last_h2 = soup.find_all('h2')
suggested_placement = f"After the section: '{last_h2[-1].get_text()}'" if last_h2 else "At the end of the article"
prompt = prompt_template["user_template"].format(
current_content=html_content,
missing_elements=missing_description,
main_keyword=main_keyword,
target_entities=target_entities,
target_searches=target_searches,
target_word_count=target_word_count,
suggested_placement=suggested_placement
)
try:
new_paragraphs = self.ai_client.generate(
prompt=prompt,
model=model,
temperature=0.7,
max_tokens=500
)
new_paragraphs = new_paragraphs.strip()
soup = BeautifulSoup(html_content, 'html.parser')
new_soup = BeautifulSoup(new_paragraphs, 'html.parser')
insertion_point = None
if last_h2:
last_section = last_h2[-1]
following_paras = []
for sibling in last_section.next_siblings:
if sibling.name == 'h2':
break
if sibling.name == 'p':
following_paras.append(sibling)
if following_paras:
insertion_point = following_paras[-1]
new_para_tags = new_soup.find_all('p')
words_added = sum(len(p.get_text().split()) for p in new_para_tags)
if insertion_point:
for para in reversed(new_para_tags):
insertion_point.insert_after(para)
else:
h2_tags = soup.find_all('h2')
if h2_tags:
for para in reversed(new_para_tags):
h2_tags[-1].insert_after(para)
else:
for para in new_para_tags:
soup.append(para)
log["paragraphs_added"] = len(new_para_tags)
log["words_added"] = words_added
log["target_word_count"] = target_word_count
log["word_count_deficit"] = word_count_deficit
log["changes"].append(f"Added {len(new_para_tags)} AI-generated paragraph(s) with ~{words_added} words (target: {target_word_count}, deficit: {word_count_deficit})")
return str(soup), log
except Exception as e:
log["changes"].append(f"AI augmentation failed: {str(e)}")
return html_content, log
def _load_prompt(self, filename: str) -> Dict[str, Any]:
"""Load prompt template from JSON file"""
prompt_path = self.prompts_dir / filename
if not prompt_path.exists():
raise ValueError(f"Prompt template not found: {filename}")
with open(prompt_path, 'r', encoding='utf-8') as f:
return json.load(f)
def _format_missing_elements(self, missing: Dict[str, Any]) -> str:
"""Format missing elements into readable description"""
descriptions = []
if missing.get("word_count_deficit"):
descriptions.append(f"Need {missing['word_count_deficit']} more words to meet target")
if missing.get("keyword_mentions"):
descriptions.append(f"Need {missing['keyword_mentions']} more keyword mentions")
if missing.get("entity_mentions"):
descriptions.append(f"Need {missing['entity_mentions']} more entity mentions")
if missing.get("related_search_mentions"):
descriptions.append(f"Need {missing['related_search_mentions']} more related search mentions")
return "; ".join(descriptions) if descriptions else "General content enhancement needed"
def _insert_keywords_in_sentences(
self,
soup: BeautifulSoup,

View File

@ -35,7 +35,8 @@ class BatchProcessor:
def process_job(
self,
job_config: JobConfig,
progress_callback: Optional[callable] = None
progress_callback: Optional[callable] = None,
debug: bool = False
) -> JobResult:
"""
Process a batch job according to configuration
@ -83,7 +84,9 @@ class BatchProcessor:
title_model=tier_config.models.title,
outline_model=tier_config.models.outline,
content_model=tier_config.models.content,
max_retries=tier_config.validation_attempts
max_retries=tier_config.validation_attempts,
progress_callback=progress_callback,
debug=debug
)
result.successful += 1

View File

@ -1,9 +1,9 @@
{
"system": "You are an SEO content enhancement specialist who adds natural, relevant paragraphs to articles to meet optimization targets.",
"user_template": "Add a new paragraph to the following article to address these missing elements:\n\nCurrent Article:\n{current_content}\n\nWhat's Missing:\n{missing_elements}\n\nMain Keyword: {main_keyword}\nEntities to use: {target_entities}\nRelated Searches to reference: {target_searches}\n\nInstructions:\n1. Write ONE substantial paragraph (100-150 words)\n2. Naturally incorporate the missing keywords/entities/searches\n3. Make it relevant to the article topic\n4. Use a professional, engaging tone\n5. Don't repeat information already in the article\n6. The paragraph should feel like a natural addition\n\nSuggested placement: {suggested_placement}\n\nRespond with ONLY the new paragraph in HTML format:\n<p>Your new paragraph here...</p>\n\nDo not include the entire article, just the new paragraph to insert.",
"system": "You are a content enhancement specialist who adds natural, relevant paragraphs to articles to meet optimization targets.",
"user_template": "Add new paragraph(s) to the following article to address these missing elements:\n\nCurrent Article:\n{current_content}\n\nWhat's Missing:\n{missing_elements}\n\nMain Keyword: {main_keyword}\nEntities to use: {target_entities}\nRelated Searches to reference: {target_searches}\nTarget Word Count for New Content: {target_word_count} words\n\nInstructions:\n1. Write {target_word_count} words of new content (1-3 paragraphs as needed)\n2. Naturally incorporate the missing keywords/entities/searches\n3. Make it relevant to the article topic\n4. Use a professional, engaging tone\n5. Don't directly repeat information already in the article\n6. The paragraphs should feel like natural additions\n7. IMPORTANT: Write at least {target_word_count} words to ensure we meet the target\n\nSuggested placement: {suggested_placement}\n\nRespond with ONLY the new paragraph(s) in HTML format:\n<p>First paragraph here...</p>\n<p>Second paragraph here...</p>\n\nDo not include the entire article, just the new paragraph(s) to insert.",
"validation": {
"output_format": "html",
"is_single_paragraph": true
"output_format": "html"
}
}

View File

@ -1,6 +1,6 @@
{
"system": "You are an expert content writer who creates comprehensive, engaging articles that strictly follow the provided outline and meet all CORA optimization requirements.",
"user_template": "Write a complete, SEO-optimized article following this outline:\n\n{outline}\n\nArticle Details:\n- Title: {title}\n- Main Keyword: {main_keyword}\n- Target Token Count: {word_count}\n- Keyword Frequency Target: {term_frequency} mentions\n\nEntities to incorporate: {entities}\nRelated Searches to reference: {related_searches}\n\nCritical Requirements:\n1. Follow the outline structure EXACTLY - use the provided H2 and H3 headings word-for-word\n2. Do NOT add numbering, Roman numerals, or letters to the headings\n3. The article must be {word_count} words long (±100 tokens)\n4. Mention the main keyword \"{main_keyword}\" naturally {term_frequency} times throughout\n5. Write 2-3 substantial paragraphs under each heading\n6. For the FAQ section:\n - Each FAQ answer MUST begin by restating the question\n - Provide detailed, helpful answers (100-150 words each)\n7. Incorporate entities and related searches naturally throughout\n8. Write in a professional, engaging tone\n9. Make content informative and valuable to readers\n10. Use varied sentence structures and vocabulary\n\nFormatting Requirements:\n- Use <h1> for the main title\n- Use <h2> for major sections\n- Use <h3> for subsections\n- Use <p> for paragraphs\n- Use <ul> and <li> for lists where appropriate\n- Do NOT include any CSS, <html>, <head>, or <body> tags\n- Return ONLY the article content HTML\n\nExample structure:\n<h1>Main Title</h1>\n<p>Introduction paragraph...</p>\n\n<h2>First Section</h2>\n<p>Content...</p>\n\n<h3>Subsection</h3>\n<p>More content...</p>\n\nWrite the complete article now.",
"system": "You are an creative content writer who creates comprehensive, engaging articles that strictly follow the provided outline and meet all CORA optimization requirements.",
"user_template": "Write a complete, SEO-optimized article following this outline:\n\n{outline}\n\nArticle Details:\n- Title: {title}\n- Main Keyword: {main_keyword}\n- Target Token Count: {word_count}\n- Keyword Frequency Target: {term_frequency}% mentions\n\nEntities to incorporate: {entities}\nRelated Searches to reference: {related_searches}\n\nCritical Requirements:\n1. Follow the outline structure EXACTLY - use the provided H2 and H3 headings word-for-word\n2. Do NOT add numbering, Roman numerals, or letters to the headings\n3. The article must be {word_count} tokens long (±100 tokens)\n4. Mention the main keyword \"{main_keyword}\" naturally {term_frequency}% times throughout\n5. Write 2-3 substantial paragraphs under each heading. Reference industry standards, regulations, or best practices. Use relevant LSI and entities for the topic\n6. For the FAQ section:\n - Each FAQ answer MUST begin by restating the question\n - Provide detailed, helpful answers (100-150 words each)\n7. Incorporate entities and related searches naturally throughout\n8. Write in a professional, engaging tone. Use active voice for 80% of sentences\n9. Make content informative and valuable to readers. Use technical terminology appropriate for industry professionals.\n10. Use varied sentence structures and vocabulary.\n11. STRICTLY PROHIBITED: Filler phrases: 'it is important to note', as mentioned earlier', 'in conclusion' - Marketing language: 'revolutionary', 'game-changing', 'industry-leading', 'best-in-class' - Generic openings: 'In today's world', 'As we all know', 'It goes without saying' \n\nFormatting Requirements:\n- Use <h1> for the main title\n- Use <h2> for major sections\n- Use <h3> for subsections\n- Use <p> for paragraphs\n- Use <ul> and <li> for lists where appropriate\n- Do NOT include any CSS, <html>, <head>, or <body> tags\n- Return ONLY the article content HTML\n\nExample structure:\n<h1>Main Title</h1>\n<p>Introduction paragraph...</p>\n\n<h2>First Section</h2>\n<p>Content...</p>\n\n<h3>Subsection</h3>\n<p>More content...</p>\n\nWrite the complete article now.",
"validation": {
"output_format": "html",
"min_word_count": true,

View File

@ -1,5 +1,5 @@
{
"system": "You are an SEO optimization expert who adjusts article outlines to meet specific CORA targets while maintaining natural flow.",
"system": "You are a Content expert who adjusts article outlines to meet specific CORA targets while maintaining natural flow. You are very detail-orientated and will not miss any targets.",
"user_template": "Modify the following article outline to meet the required CORA targets:\n\nCurrent Outline:\n{current_outline}\n\nValidation Issues:\n{validation_issues}\n\nWhat needs to be added/changed:\n{missing_elements}\n\nCORA Targets:\n- H2 total needed: {h2_total}\n- H2s with main keyword \"{main_keyword}\": {h2_exact}\n- H2s with entities: {h2_entities}\n- H2s with related searches: {h2_related_search}\n- H3 total needed: {h3_total}\n- H3s with main keyword: {h3_exact}\n- H3s with entities: {h3_entities}\n- H3s with related searches: {h3_related_search}\n\nAvailable Entities: {entities}\nRelated Searches: {related_searches}\n\nInstructions:\n1. Add missing H2 or H3 headings as needed\n2. Modify existing headings to include required keywords/entities/searches\n3. Maintain logical flow and structure\n4. Keep the first H2 with the main keyword if possible\n5. Ensure FAQ section remains intact\n6. Meet ALL CORA targets exactly\n\nIMPORTANT FORMATTING RULES:\n- Do NOT include numbering (1., 2., 3.)\n- Do NOT include Roman numerals (I., II., III.)\n- Do NOT include letters (A., B., C.)\n- Do NOT include any outline-style prefixes\n- Return clean heading text only\n\nRespond in the same JSON format:\n{{\n \"h1\": \"The main H1 heading\",\n \"sections\": [\n {{\n \"h2\": \"H2 heading text\",\n \"h3s\": [\"H3 heading 1\", \"H3 heading 2\"]\n }}\n ]\n}}\n\nReturn the complete modified outline.",
"validation": {
"output_format": "json",

View File

@ -1,9 +1,9 @@
{
"system": "You are an expert SEO content strategist who creates detailed, keyword-rich article outlines that meet strict CORA optimization targets.",
"user_template": "Create a detailed article outline for the following:\n\nTitle: {title}\nMain Keyword: {main_keyword}\nTarget Word Count: {word_count}\n\nCORA Targets:\n- H2 headings needed: {h2_total}\n- H2s with main keyword: {h2_exact}\n- H2s with related searches: {h2_related_search}\n- H2s with entities: {h2_entities}\n- H3 headings needed: {h3_total}\n- H3s with main keyword: {h3_exact}\n- H3s with related searches: {h3_related_search}\n- H3s with entities: {h3_entities}\n\nAvailable Entities: {entities}\nRelated Searches: {related_searches}\n\nRequirements:\n1. Create exactly {h2_total} H2 headings\n2. Create exactly {h3_total} H3 headings (distributed under H2s)\n3. At least {h2_exact} H2s must contain the exact keyword \"{main_keyword}\"\n4. The FIRST H2 should contain the main keyword\n5. Incorporate entities and related searches naturally into headings\n6. Include a \"Frequently Asked Questions\" H2 section with at least 3 H3 questions\n7. Each H3 question should be a complete question ending with ?\n8. Structure should flow logically\n\nIMPORTANT FORMATTING RULES:\n- Do NOT include numbering (1., 2., 3.)\n- Do NOT include Roman numerals (I., II., III.)\n- Do NOT include letters (A., B., C.)\n- Do NOT include any outline-style prefixes\n- Return clean heading text only\n\nWRONG: \"I. Introduction to {main_keyword}\"\nWRONG: \"1. Getting Started with {main_keyword}\"\nRIGHT: \"Introduction to {main_keyword}\"\nRIGHT: \"Getting Started with {main_keyword}\"\n\nRespond in JSON format:\n{{\n \"h1\": \"The main H1 heading (should contain main keyword)\",\n \"sections\": [\n {{\n \"h2\": \"H2 heading text\",\n \"h3s\": [\"H3 heading 1\", \"H3 heading 2\"]\n }}\n ]\n}}\n\nEnsure all CORA targets are met. Be precise with the numbers.",
"system": "You are an expert content strategist who creates compelling, specific article titles that provide clear direction for content creation. You also strive to meet strict CORA optimization targets.",
"user_template": "Create a detailed article outline for the following:\n\nTitle: {title}\nMain Keyword: {main_keyword}\nTarget Word Count: {word_count}\n\nCORA Targets:\n- H2 headings needed: {h2_total}\n- H2s with main keyword: {h2_exact}\n- H2s with related searches: {h2_related_search}\n- H2s with entities: {h2_entities}\n- H3 headings needed: {h3_total}\n- H3s with main keyword: {h3_exact}\n- H3s with related searches: {h3_related_search}\n- H3s with entities: {h3_entities}\n\nAvailable Entities: {entities}\nRelated Searches: {related_searches}\n\nThe title provided above will serve as the H1 heading for this article. Focus on creating the H2 and H3 structure that supports this title.\n\nRequirements:\n1. Create exactly {h2_total} H2 headings\n2. Create exactly {h3_total} H3 headings (distributed under H2s)\n3. At least {h2_exact} H2s must contain the exact keyword \"{main_keyword}\"\n4. The FIRST H2 should contain the main keyword\n5. Incorporate entities and related searches naturally into headings\n6. Include a \"Frequently Asked Questions\" H2 section with at least 3 H3 questions\n7. Each H3 question should be a complete question ending with ?\n8. Structure should flow logically\nCreate headings that build logically toward actionable insights\n9. Use specific, searchable language over generic terms\n 9. Include sub-topic hints in parentheses where helpful \n 10. Focus on reader problems and solutions.\n 11. FORBIDDEN ELEMENTS: Future-tense speculation ('The Future of...', 'Upcoming Trends') - Generic business-speak ('in today's competitive landscape', 'cutting-edge solutions') - Vague qualifiers ('best practices', 'industry-leading', 'world-class') \n\nIMPORTANT FORMATTING RULES:\n- Do NOT include numbering (1., 2., 3.)\n- Do NOT include Roman numerals (I., II., III.)\n- Do NOT include letters (A., B., C.)\n- Do NOT include any outline-style prefixes\n- Return clean heading text only\n\nWRONG: \"I. Introduction to {main_keyword}\"\nWRONG: \"1. Getting Started with {main_keyword}\"\nRIGHT: \"Introduction to {main_keyword}\"\nRIGHT: \"Getting Started with {main_keyword}\"\n\nRespond ONLY with valid JSON in this exact format (no additional text, explanations, or commentary):\n{{\n \"sections\": [\n {{\n \"h2\": \"H2 heading text\",\n \"h3s\": [\"H3 heading 1\", \"H3 heading 2\"]\n }}\n ]\n}}\n\nReturn ONLY the JSON object. Do not include any text before or after the JSON.",
"validation": {
"output_format": "json",
"required_fields": ["h1", "sections"],
"required_fields": ["sections"],
"h2_count_must_match": true,
"h3_count_must_match": true
}

View File

@ -1,10 +1,10 @@
{
"system": "You are an expert SEO content writer specializing in creating compelling, keyword-optimized titles that drive organic traffic.",
"user_template": "Generate an SEO-optimized title for an article about \"{main_keyword}\".\n\nContext:\n- Main Keyword: {main_keyword}\n- Target Word Count: {word_count}\n- Top Entities: {entities}\n- Related Searches: {related_searches}\n\nRequirements:\n1. The title MUST contain the exact main keyword: \"{main_keyword}\"\n2. The title should be compelling and click-worthy\n3. Keep it between 50-70 characters for optimal SEO\n4. Make it natural and engaging, not keyword-stuffed\n5. Consider incorporating 1-2 related entities or searches if natural\n\nRespond with ONLY the title text, no quotes or additional formatting.\n\nExample format: \"Complete Guide to {main_keyword}: Tips and Best Practices\"",
"system": "You are an expert content strategist who creates compelling, specific article titles that provide clear direction for content creation.",
"user_template": "Generate an unique, compelling article title for the broad topic: \"{main_keyword}\".\n\nContext:\n- Main Keyword: {main_keyword}\n- - Top Entities: {entities}\n- Related Searches: {related_searches}\n\nRequirements:\n1. The title MUST contain the exact main keyword: \"{main_keyword}\"\n2. The title should be compelling and click-worthy\n3. Each title must be specific enough that an AI could create substantial, focused content outline from the title alone\n4.Titles should be creative yet professionally relevant to: {{subject}}. It does not have to be directly related but must be at least tangentially related.\n5. Consider incorporating 1-2 related entities or searches if natural\n6. Mix formats: how-to guides (25%), case studies (10%), expert analyses (20%), comparison pieces (15%), trend analyses (10%), problem-solving articles (10%), listicles(10%)\nAvoid generic business jargon and AI slop (cutting-edge,game-changing, revolutionary)\n7- Use domain-specific terminology appropriate for an article about {main_keyword}\n 8-Include specific, actionable language that suggests clear content direction\n\nRespond with ONLY the title text, no quotes or additional formatting.\n\nExample format: \"Complete Guide to {main_keyword}: Tips and Best Practices\"",
"validation": {
"must_contain_keyword": true,
"min_length": 30,
"max_length": 100
"max_length": 120
}
}

View File

@ -44,7 +44,7 @@ class ContentGenerationService:
self.content_repo = GeneratedContentRepository(session)
self.rule_engine = ContentRuleEngine(self.config)
self.validator = StageValidator(self.config, self.rule_engine)
self.augmenter = ContentAugmenter()
self.augmenter = ContentAugmenter(ai_client=self.ai_client)
self.prompts_dir = Path(__file__).parent / "prompts"
@ -55,7 +55,9 @@ class ContentGenerationService:
title_model: str,
outline_model: str,
content_model: str,
max_retries: int = 3
max_retries: int = 3,
progress_callback: Optional[callable] = None,
debug: bool = False
) -> GeneratedContent:
"""
Generate complete article through three-stage pipeline
@ -67,6 +69,8 @@ class ContentGenerationService:
outline_model: Model for outline generation
content_model: Model for content generation
max_retries: Max retry attempts per stage
progress_callback: Optional callback for progress updates
debug: Enable debug output
Returns:
GeneratedContent record with completed article
@ -272,7 +276,7 @@ class ContentGenerationService:
title=title,
main_keyword=project.main_keyword,
word_count=project.word_count,
term_frequency=project.term_frequency or 3,
term_frequency=project.term_frequency or self.config.content_rules.universal.default_term_frequency,
entities=entities_str,
related_searches=searches_str
)
@ -304,23 +308,25 @@ class ContentGenerationService:
return html_content
if attempt < max_retries:
missing = self.validator.extract_missing_elements(validation_result, project)
missing = self.validator.extract_missing_elements(validation_result, project, html_content)
has_word_deficit = missing.get("word_count_deficit", 0) > 0
if missing and any(missing.values()):
augmented_html, aug_log = self.augmenter.augment_content(
html_content, missing, project.main_keyword,
project.entities or [], project.related_searches or []
)
is_valid_aug, validation_result_aug = self.validator.validate_content(
augmented_html, project
)
if is_valid_aug:
if has_word_deficit:
try:
augmented_html, aug_log = self.augmenter.augment_content_with_ai(
html_content, missing, project.main_keyword,
project.entities or [], project.related_searches or [],
model=model
)
is_valid_aug, validation_result_aug = self.validator.validate_content(
augmented_html, project
)
content_record.content = augmented_html
content_record.augmented = True
existing_log = content_record.augmentation_log or {}
existing_log["content_augmentation"] = aug_log
existing_log["content_ai_augmentation"] = aug_log
content_record.augmentation_log = existing_log
content_record.validation_errors = len(validation_result_aug.errors)
content_record.validation_warnings = len(validation_result_aug.warnings)
@ -328,10 +334,26 @@ class ContentGenerationService:
word_count = len(augmented_html.split())
content_record.word_count = word_count
self.content_repo.update(content_record)
return augmented_html
error_summary = ", ".join([e.message for e in validation_result.errors[:5]])
prompt += f"\n\nPrevious content failed validation: {error_summary}. Please fix these issues."
missing_after = self.validator.extract_missing_elements(validation_result_aug, project, augmented_html)
still_short = missing_after.get("word_count_deficit", 0) > 0
if not still_short:
return augmented_html
html_content = augmented_html
validation_result = validation_result_aug
except Exception as e:
print(f"AI augmentation failed: {e}")
error_summary = f"Word count too short. AI augmentation failed: {str(e)}"
prompt += f"\n\nPrevious content failed validation: {error_summary}. Generate MORE content to meet the word count target."
else:
content_record.content = html_content
word_count = len(html_content.split())
content_record.word_count = word_count
self.content_repo.update(content_record)
return html_content
except AIClientError as e:
if attempt == max_retries:

View File

@ -53,8 +53,8 @@ class StageValidator:
if len(title) < 30:
errors.append(f"Title too short: {len(title)} chars (min 30)")
if len(title) > 100:
errors.append(f"Title too long: {len(title)} chars (max 100)")
if len(title) > 120:
errors.append(f"Title too long: {len(title)} chars (max 120)")
if project.main_keyword.lower() not in title.lower():
errors.append(f"Title must contain main keyword: '{project.main_keyword}'")
@ -196,7 +196,8 @@ class StageValidator:
def extract_missing_elements(
self,
validation_result: ValidationResult,
project: Project
project: Project,
html_content: str
) -> Dict[str, Any]:
"""
Extract specific missing elements from validation result
@ -204,22 +205,43 @@ class StageValidator:
Args:
validation_result: Validation result from rule engine
project: Project with CORA data
html_content: HTML content to calculate word count
Returns:
Dictionary of missing elements with counts
"""
missing = {}
parser = ContentHTMLParser()
parser.feed(html_content)
current_word_count = len(parser.text_content.split())
for error in validation_result.errors:
msg = error.message.lower()
if "too short" in msg or "content is too short" in msg:
try:
target_word_count = int(project.word_count) if project.word_count else 1000
tolerance = self.config.content_rules.universal.word_count_tolerance
min_word_count = int(target_word_count * (1 - tolerance / 100))
deficit = max(0, min_word_count - current_word_count)
missing["word_count_deficit"] = deficit
except:
missing["word_count_deficit"] = 100
if "keyword" in msg and "mention" in msg:
try:
parts = msg.split("found")
if len(parts) > 1:
found = int(parts[1].split()[0])
target = project.term_frequency or 3
missing["keyword_mentions"] = max(0, target - found)
# Calculate target count based on percentage
target_percentage = project.term_frequency or self.config.content_rules.universal.default_term_frequency
# Parse HTML to get text content only (same as rule engine)
parser = ContentHTMLParser()
parser.feed(html_content)
total_words = len(parser.text_content.split())
target_count = int((target_percentage / 100) * total_words)
missing["keyword_mentions"] = max(0, target_count - found)
except:
missing["keyword_mentions"] = 1

View File

@ -0,0 +1,360 @@
"""
Content generation service - orchestrates the three-stage AI generation pipeline
"""
import time
import json
from pathlib import Path
from typing import Dict, Any, Optional, Tuple
from src.database.models import Project, GeneratedContent
from src.database.repositories import GeneratedContentRepository
from src.generation.ai_client import AIClient, AIClientError
from src.generation.validator import StageValidator
from src.generation.augmenter import ContentAugmenter
from src.generation.rule_engine import ContentRuleEngine
from src.core.config import Config, get_config
from sqlalchemy.orm import Session
class GenerationError(Exception):
"""Content generation error"""
pass
class ContentGenerationService:
"""Service for AI-powered content generation with validation"""
def __init__(
self,
session: Session,
config: Optional[Config] = None,
ai_client: Optional[AIClient] = None
):
"""
Initialize service
Args:
session: Database session
config: Application configuration
ai_client: AI client (creates new if None)
"""
self.session = session
self.config = config or get_config()
self.ai_client = ai_client or AIClient(self.config)
self.content_repo = GeneratedContentRepository(session)
self.rule_engine = ContentRuleEngine(self.config)
self.validator = StageValidator(self.config, self.rule_engine)
self.augmenter = ContentAugmenter()
self.prompts_dir = Path(__file__).parent / "prompts"
def generate_article(
self,
project: Project,
tier: int,
title_model: str,
outline_model: str,
content_model: str,
max_retries: int = 3
) -> GeneratedContent:
"""
Generate complete article through three-stage pipeline
Args:
project: Project with CORA data
tier: Tier level
title_model: Model for title generation
outline_model: Model for outline generation
content_model: Model for content generation
max_retries: Max retry attempts per stage
Returns:
GeneratedContent record with completed article
Raises:
GenerationError: If generation fails after all retries
"""
start_time = time.time()
content_record = self.content_repo.create(project.id, tier)
content_record.title_model = title_model
content_record.outline_model = outline_model
content_record.content_model = content_model
self.content_repo.update(content_record)
try:
title = self._generate_title(project, content_record, title_model, max_retries)
content_record.generation_stage = "outline"
self.content_repo.update(content_record)
outline = self._generate_outline(project, title, content_record, outline_model, max_retries)
content_record.generation_stage = "content"
self.content_repo.update(content_record)
html_content = self._generate_content(
project, title, outline, content_record, content_model, max_retries
)
content_record.status = "completed"
content_record.generation_duration = time.time() - start_time
self.content_repo.update(content_record)
return content_record
except Exception as e:
content_record.status = "failed"
content_record.error_message = str(e)
content_record.generation_duration = time.time() - start_time
self.content_repo.update(content_record)
raise GenerationError(f"Article generation failed: {e}")
def _generate_title(
self,
project: Project,
content_record: GeneratedContent,
model: str,
max_retries: int
) -> str:
"""Generate and validate title"""
prompt_template = self._load_prompt("title_generation.json")
entities_str = ", ".join(project.entities[:10]) if project.entities else "N/A"
searches_str = ", ".join(project.related_searches[:10]) if project.related_searches else "N/A"
prompt = prompt_template["user_template"].format(
main_keyword=project.main_keyword,
word_count=project.word_count,
entities=entities_str,
related_searches=searches_str
)
for attempt in range(1, max_retries + 1):
content_record.title_attempts = attempt
self.content_repo.update(content_record)
try:
title = self.ai_client.generate(
prompt=prompt,
model=model,
temperature=0.7
)
is_valid, errors = self.validator.validate_title(title, project)
if is_valid:
content_record.title = title
self.content_repo.update(content_record)
return title
if attempt < max_retries:
prompt += f"\n\nPrevious attempt failed: {', '.join(errors)}. Please fix these issues."
except AIClientError as e:
if attempt == max_retries:
raise GenerationError(f"Title generation failed after {max_retries} attempts: {e}")
raise GenerationError(f"Title validation failed after {max_retries} attempts")
def _generate_outline(
self,
project: Project,
title: str,
content_record: GeneratedContent,
model: str,
max_retries: int
) -> Dict[str, Any]:
"""Generate and validate outline"""
prompt_template = self._load_prompt("outline_generation.json")
entities_str = ", ".join(project.entities[:20]) if project.entities else "N/A"
searches_str = ", ".join(project.related_searches[:20]) if project.related_searches else "N/A"
h2_total = int(project.h2_total) if project.h2_total else 5
h2_exact = int(project.h2_exact) if project.h2_exact else 1
h2_related = int(project.h2_related_search) if project.h2_related_search else 1
h2_entities = int(project.h2_entities) if project.h2_entities else 2
h3_total = int(project.h3_total) if project.h3_total else 10
h3_exact = int(project.h3_exact) if project.h3_exact else 1
h3_related = int(project.h3_related_search) if project.h3_related_search else 2
h3_entities = int(project.h3_entities) if project.h3_entities else 3
if self.config.content_rules.cora_validation.round_averages_down:
h2_total = int(h2_total)
h3_total = int(h3_total)
prompt = prompt_template["user_template"].format(
title=title,
main_keyword=project.main_keyword,
word_count=project.word_count,
h2_total=h2_total,
h2_exact=h2_exact,
h2_related_search=h2_related,
h2_entities=h2_entities,
h3_total=h3_total,
h3_exact=h3_exact,
h3_related_search=h3_related,
h3_entities=h3_entities,
entities=entities_str,
related_searches=searches_str
)
for attempt in range(1, max_retries + 1):
content_record.outline_attempts = attempt
self.content_repo.update(content_record)
try:
outline_json_str = self.ai_client.generate_json(
prompt=prompt,
model=model,
temperature=0.7,
max_tokens=2000
)
if isinstance(outline_json_str, str):
outline = json.loads(outline_json_str)
else:
outline = outline_json_str
is_valid, errors, missing = self.validator.validate_outline(outline, project)
if is_valid:
content_record.outline = json.dumps(outline)
self.content_repo.update(content_record)
return outline
if attempt < max_retries:
if missing:
augmented_outline, aug_log = self.augmenter.augment_outline(
outline, missing, project.main_keyword,
project.entities or [], project.related_searches or []
)
is_valid_aug, errors_aug, _ = self.validator.validate_outline(
augmented_outline, project
)
if is_valid_aug:
content_record.outline = json.dumps(augmented_outline)
content_record.augmented = True
content_record.augmentation_log = aug_log
self.content_repo.update(content_record)
return augmented_outline
prompt += f"\n\nPrevious attempt failed: {', '.join(errors)}. Please meet ALL CORA targets exactly."
except (AIClientError, json.JSONDecodeError) as e:
if attempt == max_retries:
raise GenerationError(f"Outline generation failed after {max_retries} attempts: {e}")
raise GenerationError(f"Outline validation failed after {max_retries} attempts")
def _generate_content(
self,
project: Project,
title: str,
outline: Dict[str, Any],
content_record: GeneratedContent,
model: str,
max_retries: int
) -> str:
"""Generate and validate full HTML content"""
prompt_template = self._load_prompt("content_generation.json")
outline_str = self._format_outline_for_prompt(outline)
entities_str = ", ".join(project.entities[:30]) if project.entities else "N/A"
searches_str = ", ".join(project.related_searches[:30]) if project.related_searches else "N/A"
prompt = prompt_template["user_template"].format(
outline=outline_str,
title=title,
main_keyword=project.main_keyword,
word_count=project.word_count,
term_frequency=project.term_frequency or 3,
entities=entities_str,
related_searches=searches_str
)
for attempt in range(1, max_retries + 1):
content_record.content_attempts = attempt
self.content_repo.update(content_record)
try:
html_content = self.ai_client.generate(
prompt=prompt,
model=model,
temperature=0.7,
max_tokens=self.config.ai_service.max_tokens
)
is_valid, validation_result = self.validator.validate_content(html_content, project)
content_record.validation_errors = len(validation_result.errors)
content_record.validation_warnings = len(validation_result.warnings)
content_record.validation_report = validation_result.to_dict()
self.content_repo.update(content_record)
if is_valid:
content_record.content = html_content
word_count = len(html_content.split())
content_record.word_count = word_count
self.content_repo.update(content_record)
return html_content
if attempt < max_retries:
missing = self.validator.extract_missing_elements(validation_result, project)
if missing and any(missing.values()):
augmented_html, aug_log = self.augmenter.augment_content(
html_content, missing, project.main_keyword,
project.entities or [], project.related_searches or []
)
is_valid_aug, validation_result_aug = self.validator.validate_content(
augmented_html, project
)
if is_valid_aug:
content_record.content = augmented_html
content_record.augmented = True
existing_log = content_record.augmentation_log or {}
existing_log["content_augmentation"] = aug_log
content_record.augmentation_log = existing_log
content_record.validation_errors = len(validation_result_aug.errors)
content_record.validation_warnings = len(validation_result_aug.warnings)
content_record.validation_report = validation_result_aug.to_dict()
word_count = len(augmented_html.split())
content_record.word_count = word_count
self.content_repo.update(content_record)
return augmented_html
error_summary = ", ".join([e.message for e in validation_result.errors[:5]])
prompt += f"\n\nPrevious content failed validation: {error_summary}. Please fix these issues."
except AIClientError as e:
if attempt == max_retries:
raise GenerationError(f"Content generation failed after {max_retries} attempts: {e}")
raise GenerationError(f"Content validation failed after {max_retries} attempts")
def _load_prompt(self, filename: str) -> Dict[str, Any]:
"""Load prompt template from JSON file"""
prompt_path = self.prompts_dir / filename
if not prompt_path.exists():
raise GenerationError(f"Prompt template not found: {filename}")
with open(prompt_path, 'r', encoding='utf-8') as f:
return json.load(f)
def _format_outline_for_prompt(self, outline: Dict[str, Any]) -> str:
"""Format outline JSON into readable string for content prompt"""
lines = [f"H1: {outline.get('h1', '')}"]
for section in outline.get("sections", []):
lines.append(f"\nH2: {section['h2']}")
for h3 in section.get("h3s", []):
lines.append(f" H3: {h3}")
return "\n".join(lines)

View File

@ -102,7 +102,7 @@ class TestProjectRepositoryIntegration:
assert project.main_keyword == "test keyword"
assert project.user_id == user.id
assert project.word_count == 1500
assert project.term_frequency == 3
assert project.term_frequency == 2
assert len(project.entities) == 3
assert len(project.related_searches) == 2

View File

@ -30,7 +30,7 @@ def mock_project():
project.id = 1
project.main_keyword = "test keyword"
project.word_count = 1000
project.term_frequency = 3
project.term_frequency = 2
project.tier = 1
project.h2_total = 5
project.h2_exact = 1