452 lines
15 KiB
Python
452 lines
15 KiB
Python
"""
|
|
Unit tests for content rule engine
|
|
"""
|
|
|
|
import pytest
|
|
from unittest.mock import Mock
|
|
from src.generation.rule_engine import (
|
|
ContentRuleEngine,
|
|
ContentHTMLParser,
|
|
ValidationResult,
|
|
ValidationIssue
|
|
)
|
|
from src.database.models import Project
|
|
from src.core.config import Config
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_config():
|
|
"""Mock configuration for tests"""
|
|
config = Mock()
|
|
config.get = Mock(side_effect=lambda key, default={}: {
|
|
"content_rules.universal": {
|
|
"min_content_length": 1000,
|
|
"max_content_length": 5000,
|
|
"title_exact_match_required": True,
|
|
"h1_exact_match_required": True,
|
|
"h2_exact_match_min": 1,
|
|
"h3_exact_match_min": 1,
|
|
"faq_section_required": True,
|
|
"image_alt_text_keyword_required": True,
|
|
"image_alt_text_entity_required": True
|
|
},
|
|
"content_rules.cora_validation": {
|
|
"enabled": True,
|
|
"tier_1_strict": True,
|
|
"tier_2_plus_warn_only": True,
|
|
"round_averages_down": True
|
|
}
|
|
}.get(key, default))
|
|
return config
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_project():
|
|
"""Sample project with CORA data"""
|
|
project = Mock(spec=Project)
|
|
project.id = 1
|
|
project.main_keyword = "shaft machining"
|
|
project.tier = 1
|
|
project.entities = ["CNC", "lathe", "precision"]
|
|
project.related_searches = ["shaft machining process", "machining techniques"]
|
|
project.h1_exact = 1
|
|
project.h1_related_search = 0
|
|
project.h1_entities = 1
|
|
project.h2_total = 5
|
|
project.h2_exact = 1
|
|
project.h2_related_search = 2
|
|
project.h2_entities = 2
|
|
project.h3_total = 8
|
|
project.h3_exact = 1
|
|
project.h3_related_search = 3
|
|
project.h3_entities = 3
|
|
return project
|
|
|
|
|
|
class TestContentHTMLParser:
|
|
"""Tests for HTML parser"""
|
|
|
|
def test_parse_title(self):
|
|
html = "<html><head><title>Shaft Machining Guide</title></head></html>"
|
|
parser = ContentHTMLParser()
|
|
parser.feed(html)
|
|
assert parser.title == "Shaft Machining Guide"
|
|
|
|
def test_parse_meta_description(self):
|
|
html = '<html><head><meta name="description" content="Complete guide to shaft machining"></head></html>'
|
|
parser = ContentHTMLParser()
|
|
parser.feed(html)
|
|
assert parser.meta_description == "Complete guide to shaft machining"
|
|
|
|
def test_parse_headings(self):
|
|
html = """
|
|
<html><body>
|
|
<h1>Main Heading about Shaft Machining</h1>
|
|
<h2>Understanding CNC</h2>
|
|
<h2>Shaft Machining Process</h2>
|
|
<h3>What is a lathe?</h3>
|
|
<h3>Precision techniques</h3>
|
|
<h3>FAQ about shaft machining</h3>
|
|
</body></html>
|
|
"""
|
|
parser = ContentHTMLParser()
|
|
parser.feed(html)
|
|
|
|
assert len(parser.h1_tags) == 1
|
|
assert "Shaft Machining" in parser.h1_tags[0]
|
|
assert len(parser.h2_tags) == 2
|
|
assert len(parser.h3_tags) == 3
|
|
|
|
def test_parse_images(self):
|
|
html = """
|
|
<html><body>
|
|
<img src="image1.jpg" alt="Shaft machining with CNC lathe">
|
|
<img src="image2.jpg" alt="Precision tools">
|
|
</body></html>
|
|
"""
|
|
parser = ContentHTMLParser()
|
|
parser.feed(html)
|
|
|
|
assert len(parser.images) == 2
|
|
assert parser.images[0]["alt"] == "Shaft machining with CNC lathe"
|
|
assert parser.images[1]["alt"] == "Precision tools"
|
|
|
|
def test_parse_links(self):
|
|
html = """
|
|
<html><body>
|
|
<a href="/home">Home Page</a>
|
|
<a href="/article">Related Article</a>
|
|
</body></html>
|
|
"""
|
|
parser = ContentHTMLParser()
|
|
parser.feed(html)
|
|
|
|
assert len(parser.links) == 2
|
|
assert parser.links[0]["href"] == "/home"
|
|
assert "Home Page" in parser.links[0]["text"]
|
|
|
|
def test_parse_text_content(self):
|
|
html = """
|
|
<html><body>
|
|
<h1>Title</h1>
|
|
<p>This is some content about shaft machining and CNC operations.</p>
|
|
<p>More content here with precision lathe work.</p>
|
|
</body></html>
|
|
"""
|
|
parser = ContentHTMLParser()
|
|
parser.feed(html)
|
|
|
|
assert "shaft machining" in parser.text_content.lower()
|
|
assert "CNC" in parser.text_content
|
|
assert len(parser.text_content.split()) > 10
|
|
|
|
|
|
class TestValidationResult:
|
|
"""Tests for ValidationResult class"""
|
|
|
|
def test_initial_state(self):
|
|
result = ValidationResult(passed=True)
|
|
assert result.passed is True
|
|
assert len(result.errors) == 0
|
|
assert len(result.warnings) == 0
|
|
|
|
def test_add_error(self):
|
|
result = ValidationResult(passed=True)
|
|
result.add_error("test_rule", "Test error", expected=5, actual=3)
|
|
|
|
assert result.passed is False
|
|
assert len(result.errors) == 1
|
|
assert result.errors[0].rule_name == "test_rule"
|
|
assert result.errors[0].severity == "error"
|
|
|
|
def test_add_warning(self):
|
|
result = ValidationResult(passed=True)
|
|
result.add_warning("test_rule", "Test warning", expected=5, actual=4)
|
|
|
|
assert result.passed is True
|
|
assert len(result.warnings) == 1
|
|
assert result.warnings[0].severity == "warning"
|
|
|
|
def test_to_dict(self):
|
|
result = ValidationResult(passed=False)
|
|
result.add_error("rule1", "Error message", expected=5, actual=3)
|
|
result.add_warning("rule2", "Warning message", expected=10, actual=8)
|
|
|
|
data = result.to_dict()
|
|
assert data["passed"] is False
|
|
assert len(data["errors"]) == 1
|
|
assert len(data["warnings"]) == 1
|
|
assert data["errors"][0]["rule"] == "rule1"
|
|
assert data["warnings"][0]["rule"] == "rule2"
|
|
|
|
|
|
class TestUniversalRules:
|
|
"""Tests for universal rule validation"""
|
|
|
|
def test_content_length_validation(self, mock_config, sample_project):
|
|
engine = ContentRuleEngine(mock_config)
|
|
|
|
short_html = "<html><body><h1>Shaft machining</h1><p>Short content.</p></body></html>"
|
|
result = engine.validate(short_html, sample_project)
|
|
|
|
assert not result.passed
|
|
assert any("too short" in e.message for e in result.errors)
|
|
|
|
def test_title_keyword_required(self, mock_config, sample_project):
|
|
engine = ContentRuleEngine(mock_config)
|
|
|
|
html_without_keyword = "<html><head><title>Generic Title</title></head><body>" + "word " * 1500 + "</body></html>"
|
|
result = engine.validate(html_without_keyword, sample_project)
|
|
|
|
assert any("title" in e.rule_name.lower() for e in result.errors)
|
|
|
|
def test_h1_keyword_required(self, mock_config, sample_project):
|
|
engine = ContentRuleEngine(mock_config)
|
|
|
|
html = """
|
|
<html>
|
|
<head><title>Shaft Machining Guide</title></head>
|
|
<body>
|
|
<h1>Generic Heading</h1>
|
|
<p>""" + "word " * 1500 + """</p>
|
|
</body>
|
|
</html>
|
|
"""
|
|
result = engine.validate(html, sample_project)
|
|
|
|
assert any("h1" in e.rule_name.lower() for e in result.errors)
|
|
|
|
def test_h2_keyword_minimum(self, mock_config, sample_project):
|
|
engine = ContentRuleEngine(mock_config)
|
|
|
|
html = """
|
|
<html>
|
|
<head><title>Shaft Machining Guide</title></head>
|
|
<body>
|
|
<h1>Shaft Machining Basics</h1>
|
|
<h2>Generic Topic</h2>
|
|
<p>""" + "word " * 1500 + """</p>
|
|
</body>
|
|
</html>
|
|
"""
|
|
result = engine.validate(html, sample_project)
|
|
|
|
assert any("h2_exact_match_min" in e.rule_name for e in result.errors)
|
|
|
|
def test_faq_section_required(self, mock_config, sample_project):
|
|
engine = ContentRuleEngine(mock_config)
|
|
|
|
html = """
|
|
<html>
|
|
<head><title>Shaft Machining Guide</title></head>
|
|
<body>
|
|
<h1>Shaft Machining Basics</h1>
|
|
<h2>Shaft Machining Process</h2>
|
|
<p>""" + "word " * 1500 + """</p>
|
|
</body>
|
|
</html>
|
|
"""
|
|
result = engine.validate(html, sample_project)
|
|
|
|
assert any("faq" in e.rule_name.lower() for e in result.errors)
|
|
|
|
def test_image_alt_text_validation(self, mock_config, sample_project):
|
|
engine = ContentRuleEngine(mock_config)
|
|
|
|
html = """
|
|
<html>
|
|
<head><title>Shaft Machining Guide</title></head>
|
|
<body>
|
|
<h1>Shaft Machining Basics</h1>
|
|
<h2>FAQ about shaft machining</h2>
|
|
<h2>Shaft Machining Techniques</h2>
|
|
<h3>What is shaft machining?</h3>
|
|
<img src="test.jpg" alt="Generic image">
|
|
<p>""" + "word " * 1500 + """</p>
|
|
</body>
|
|
</html>
|
|
"""
|
|
result = engine.validate(html, sample_project)
|
|
|
|
assert any("image_alt_text" in e.rule_name for e in result.errors)
|
|
|
|
|
|
class TestCORAValidation:
|
|
"""Tests for CORA-specific validation"""
|
|
|
|
def test_tier_1_strict_validation(self, mock_config, sample_project):
|
|
engine = ContentRuleEngine(mock_config)
|
|
sample_project.tier = 1
|
|
sample_project.h2_total = 5
|
|
|
|
html = """
|
|
<html>
|
|
<head><title>Shaft Machining Guide</title></head>
|
|
<body>
|
|
<h1>Shaft Machining with CNC</h1>
|
|
<h2>Shaft Machining Process</h2>
|
|
<h2>Understanding CNC</h2>
|
|
<h3>What is shaft machining?</h3>
|
|
<h3>FAQ</h3>
|
|
<img src="test.jpg" alt="Shaft machining with CNC lathe">
|
|
<p>""" + "word " * 1500 + """</p>
|
|
</body>
|
|
</html>
|
|
"""
|
|
result = engine.validate(html, sample_project)
|
|
|
|
h2_errors = [e for e in result.errors if "h2_total" in e.rule_name]
|
|
assert len(h2_errors) > 0
|
|
assert h2_errors[0].expected == 5
|
|
assert h2_errors[0].actual == 2
|
|
|
|
def test_tier_2_warning_only(self, mock_config, sample_project):
|
|
engine = ContentRuleEngine(mock_config)
|
|
sample_project.tier = 2
|
|
sample_project.h2_total = 5
|
|
|
|
html = """
|
|
<html>
|
|
<head><title>Shaft Machining Guide</title></head>
|
|
<body>
|
|
<h1>Shaft Machining with CNC</h1>
|
|
<h2>Shaft Machining Process</h2>
|
|
<h2>Understanding CNC</h2>
|
|
<h3>What is shaft machining?</h3>
|
|
<h3>FAQ</h3>
|
|
<img src="test.jpg" alt="Shaft machining with CNC lathe">
|
|
<p>""" + "word " * 1500 + """</p>
|
|
</body>
|
|
</html>
|
|
"""
|
|
result = engine.validate(html, sample_project)
|
|
|
|
h2_warnings = [w for w in result.warnings if "h2_total" in w.rule_name]
|
|
assert len(h2_warnings) > 0
|
|
|
|
h2_errors = [e for e in result.errors if "h2_total" in e.rule_name]
|
|
assert len(h2_errors) == 0
|
|
|
|
def test_keyword_entity_counting(self, mock_config, sample_project):
|
|
engine = ContentRuleEngine(mock_config)
|
|
|
|
html = """
|
|
<html>
|
|
<head><title>Shaft Machining Guide</title></head>
|
|
<body>
|
|
<h1>Shaft Machining Basics</h1>
|
|
<h2>Shaft Machining Process</h2>
|
|
<h2>Understanding CNC Operations</h2>
|
|
<h2>Working with Precision Lathe</h2>
|
|
<h3>What is shaft machining?</h3>
|
|
<h3>CNC Techniques</h3>
|
|
<h3>FAQ</h3>
|
|
<img src="test.jpg" alt="Shaft machining with CNC">
|
|
<p>""" + "word " * 1500 + """</p>
|
|
</body>
|
|
</html>
|
|
"""
|
|
|
|
parser = ContentHTMLParser()
|
|
parser.feed(html)
|
|
|
|
counts = engine._count_keyword_entities(parser, sample_project)
|
|
|
|
assert counts["h1_exact"] == 1
|
|
assert counts["h2_exact"] == 1
|
|
assert counts["h2_entities"] >= 2
|
|
assert counts["h3_exact"] == 1
|
|
|
|
def test_round_averages_down(self, mock_config, sample_project):
|
|
engine = ContentRuleEngine(mock_config)
|
|
sample_project.h2_total = 5.6
|
|
|
|
html = """
|
|
<html>
|
|
<head><title>Shaft Machining Guide</title></head>
|
|
<body>
|
|
<h1>Shaft Machining with CNC</h1>
|
|
<h2>Shaft Machining Process</h2>
|
|
<h2>Understanding CNC</h2>
|
|
<h2>Lathe Operations</h2>
|
|
<h2>Precision Work</h2>
|
|
<h2>Best Practices</h2>
|
|
<h3>What is shaft machining?</h3>
|
|
<h3>FAQ</h3>
|
|
<img src="test.jpg" alt="Shaft machining with CNC">
|
|
<p>""" + "word " * 1500 + """</p>
|
|
</body>
|
|
</html>
|
|
"""
|
|
result = engine.validate(html, sample_project)
|
|
|
|
h2_issues = [e for e in result.errors if "h2_total" in e.rule_name]
|
|
if h2_issues:
|
|
assert h2_issues[0].expected == 5
|
|
|
|
|
|
class TestValidContent:
|
|
"""Tests for content that should pass validation"""
|
|
|
|
def test_fully_compliant_content(self, mock_config, sample_project):
|
|
engine = ContentRuleEngine(mock_config)
|
|
|
|
html = """
|
|
<html>
|
|
<head><title>Complete Guide to Shaft Machining</title></head>
|
|
<body>
|
|
<h1>Shaft Machining: CNC Operations</h1>
|
|
|
|
<h2>Shaft Machining Process Explained</h2>
|
|
<p>Content about the main process...</p>
|
|
|
|
<h2>Understanding CNC Technology</h2>
|
|
<p>More content...</p>
|
|
|
|
<h2>Working with Precision Lathe</h2>
|
|
<p>Additional information...</p>
|
|
|
|
<h2>Shaft Machining Techniques</h2>
|
|
<p>Techniques details...</p>
|
|
|
|
<h2>Best Practices in CNC</h2>
|
|
<p>Best practices...</p>
|
|
|
|
<h3>What is shaft machining?</h3>
|
|
<p>Definition and explanation...</p>
|
|
|
|
<h3>CNC Lathe Operations</h3>
|
|
<p>Operations details...</p>
|
|
|
|
<h3>Precision Techniques</h3>
|
|
<p>Techniques information...</p>
|
|
|
|
<h3>Shaft Machining Process Guide</h3>
|
|
<p>Process details...</p>
|
|
|
|
<h3>Understanding Machining Techniques</h3>
|
|
<p>Techniques overview...</p>
|
|
|
|
<h3>CNC Setup and Shaft Machining Process</h3>
|
|
<p>Setup instructions...</p>
|
|
|
|
<h3>Lathe Maintenance for Machining Techniques</h3>
|
|
<p>Maintenance tips...</p>
|
|
|
|
<h3>FAQ: Common Questions about Shaft Machining</h3>
|
|
<p>Frequently asked questions...</p>
|
|
|
|
<img src="image1.jpg" alt="Shaft machining with CNC lathe">
|
|
<img src="image2.jpg" alt="Precision shaft machining setup">
|
|
|
|
<p>""" + " ".join(["shaft machining process details and information"] * 250) + """</p>
|
|
</body>
|
|
</html>
|
|
"""
|
|
|
|
result = engine.validate(html, sample_project)
|
|
|
|
assert result.passed is True
|
|
assert len(result.errors) == 0
|
|
|