""" Unit tests for content rule engine """ import pytest from unittest.mock import Mock from src.generation.rule_engine import ( ContentRuleEngine, ContentHTMLParser, ValidationResult, ValidationIssue ) from src.database.models import Project from src.core.config import Config @pytest.fixture def mock_config(): """Mock configuration for tests""" config = Mock() config.get = Mock(side_effect=lambda key, default={}: { "content_rules.universal": { "min_content_length": 1000, "max_content_length": 5000, "title_exact_match_required": True, "h1_exact_match_required": True, "h2_exact_match_min": 1, "h3_exact_match_min": 1, "faq_section_required": True, "image_alt_text_keyword_required": True, "image_alt_text_entity_required": True }, "content_rules.cora_validation": { "enabled": True, "tier_1_strict": True, "tier_2_plus_warn_only": True, "round_averages_down": True } }.get(key, default)) return config @pytest.fixture def sample_project(): """Sample project with CORA data""" project = Mock(spec=Project) project.id = 1 project.main_keyword = "shaft machining" project.tier = 1 project.entities = ["CNC", "lathe", "precision"] project.related_searches = ["shaft machining process", "machining techniques"] project.h1_exact = 1 project.h1_related_search = 0 project.h1_entities = 1 project.h2_total = 5 project.h2_exact = 1 project.h2_related_search = 2 project.h2_entities = 2 project.h3_total = 8 project.h3_exact = 1 project.h3_related_search = 3 project.h3_entities = 3 return project class TestContentHTMLParser: """Tests for HTML parser""" def test_parse_title(self): html = "
"""
parser = ContentHTMLParser()
parser.feed(html)
assert len(parser.images) == 2
assert parser.images[0]["alt"] == "Shaft machining with CNC lathe"
assert parser.images[1]["alt"] == "Precision tools"
def test_parse_links(self):
html = """
Home Page
Related Article
"""
parser = ContentHTMLParser()
parser.feed(html)
assert len(parser.links) == 2
assert parser.links[0]["href"] == "/home"
assert "Home Page" in parser.links[0]["text"]
def test_parse_text_content(self):
html = """
This is some content about shaft machining and CNC operations.
More content here with precision lathe work.
""" parser = ContentHTMLParser() parser.feed(html) assert "shaft machining" in parser.text_content.lower() assert "CNC" in parser.text_content assert len(parser.text_content.split()) > 10 class TestValidationResult: """Tests for ValidationResult class""" def test_initial_state(self): result = ValidationResult(passed=True) assert result.passed is True assert len(result.errors) == 0 assert len(result.warnings) == 0 def test_add_error(self): result = ValidationResult(passed=True) result.add_error("test_rule", "Test error", expected=5, actual=3) assert result.passed is False assert len(result.errors) == 1 assert result.errors[0].rule_name == "test_rule" assert result.errors[0].severity == "error" def test_add_warning(self): result = ValidationResult(passed=True) result.add_warning("test_rule", "Test warning", expected=5, actual=4) assert result.passed is True assert len(result.warnings) == 1 assert result.warnings[0].severity == "warning" def test_to_dict(self): result = ValidationResult(passed=False) result.add_error("rule1", "Error message", expected=5, actual=3) result.add_warning("rule2", "Warning message", expected=10, actual=8) data = result.to_dict() assert data["passed"] is False assert len(data["errors"]) == 1 assert len(data["warnings"]) == 1 assert data["errors"][0]["rule"] == "rule1" assert data["warnings"][0]["rule"] == "rule2" class TestUniversalRules: """Tests for universal rule validation""" def test_content_length_validation(self, mock_config, sample_project): engine = ContentRuleEngine(mock_config) short_html = "Short content.
" result = engine.validate(short_html, sample_project) assert not result.passed assert any("too short" in e.message for e in result.errors) def test_title_keyword_required(self, mock_config, sample_project): engine = ContentRuleEngine(mock_config) html_without_keyword = """" + "word " * 1500 + """
""" result = engine.validate(html, sample_project) assert any("h1" in e.rule_name.lower() for e in result.errors) def test_h2_keyword_minimum(self, mock_config, sample_project): engine = ContentRuleEngine(mock_config) html = """""" + "word " * 1500 + """
""" result = engine.validate(html, sample_project) assert any("h2_exact_match_min" in e.rule_name for e in result.errors) def test_faq_section_required(self, mock_config, sample_project): engine = ContentRuleEngine(mock_config) html = """""" + "word " * 1500 + """
""" result = engine.validate(html, sample_project) assert any("faq" in e.rule_name.lower() for e in result.errors) def test_image_alt_text_validation(self, mock_config, sample_project): engine = ContentRuleEngine(mock_config) html = """
""" + "word " * 1500 + """
""" result = engine.validate(html, sample_project) assert any("image_alt_text" in e.rule_name for e in result.errors) class TestCORAValidation: """Tests for CORA-specific validation""" def test_tier_1_strict_validation(self, mock_config, sample_project): engine = ContentRuleEngine(mock_config) sample_project.tier = 1 sample_project.h2_total = 5 html = """
""" + "word " * 1500 + """
""" result = engine.validate(html, sample_project) h2_errors = [e for e in result.errors if "h2_total" in e.rule_name] assert len(h2_errors) > 0 assert h2_errors[0].expected == 5 assert h2_errors[0].actual == 2 def test_tier_2_warning_only(self, mock_config, sample_project): engine = ContentRuleEngine(mock_config) sample_project.tier = 2 sample_project.h2_total = 5 html = """
""" + "word " * 1500 + """
""" result = engine.validate(html, sample_project) h2_warnings = [w for w in result.warnings if "h2_total" in w.rule_name] assert len(h2_warnings) > 0 h2_errors = [e for e in result.errors if "h2_total" in e.rule_name] assert len(h2_errors) == 0 def test_keyword_entity_counting(self, mock_config, sample_project): engine = ContentRuleEngine(mock_config) html = """
""" + "word " * 1500 + """
""" parser = ContentHTMLParser() parser.feed(html) counts = engine._count_keyword_entities(parser, sample_project) assert counts["h1_exact"] == 1 assert counts["h2_exact"] == 1 assert counts["h2_entities"] >= 2 assert counts["h3_exact"] == 1 def test_round_averages_down(self, mock_config, sample_project): engine = ContentRuleEngine(mock_config) sample_project.h2_total = 5.6 html = """
""" + "word " * 1500 + """
""" result = engine.validate(html, sample_project) h2_issues = [e for e in result.errors if "h2_total" in e.rule_name] if h2_issues: assert h2_issues[0].expected == 5 class TestValidContent: """Tests for content that should pass validation""" def test_fully_compliant_content(self, mock_config, sample_project): engine = ContentRuleEngine(mock_config) html = """Content about the main process...
More content...
Additional information...
Techniques details...
Best practices...
Definition and explanation...
Operations details...
Techniques information...
Process details...
Techniques overview...
Setup instructions...
Maintenance tips...
Frequently asked questions...
""" + " ".join(["shaft machining process details and information"] * 250) + """
""" result = engine.validate(html, sample_project) assert result.passed is True assert len(result.errors) == 0