""" Unit tests for content rule engine """ import pytest from unittest.mock import Mock from src.generation.rule_engine import ( ContentRuleEngine, ContentHTMLParser, ValidationResult, ValidationIssue ) from src.database.models import Project from src.core.config import Config @pytest.fixture def mock_config(): """Mock configuration for tests""" config = Mock() config.get = Mock(side_effect=lambda key, default={}: { "content_rules.universal": { "min_content_length": 1000, "max_content_length": 5000, "title_exact_match_required": True, "h1_exact_match_required": True, "h2_exact_match_min": 1, "h3_exact_match_min": 1, "faq_section_required": True, "image_alt_text_keyword_required": True, "image_alt_text_entity_required": True }, "content_rules.cora_validation": { "enabled": True, "tier_1_strict": True, "tier_2_plus_warn_only": True, "round_averages_down": True } }.get(key, default)) return config @pytest.fixture def sample_project(): """Sample project with CORA data""" project = Mock(spec=Project) project.id = 1 project.main_keyword = "shaft machining" project.tier = 1 project.entities = ["CNC", "lathe", "precision"] project.related_searches = ["shaft machining process", "machining techniques"] project.h1_exact = 1 project.h1_related_search = 0 project.h1_entities = 1 project.h2_total = 5 project.h2_exact = 1 project.h2_related_search = 2 project.h2_entities = 2 project.h3_total = 8 project.h3_exact = 1 project.h3_related_search = 3 project.h3_entities = 3 return project class TestContentHTMLParser: """Tests for HTML parser""" def test_parse_title(self): html = "Shaft Machining Guide" parser = ContentHTMLParser() parser.feed(html) assert parser.title == "Shaft Machining Guide" def test_parse_meta_description(self): html = '' parser = ContentHTMLParser() parser.feed(html) assert parser.meta_description == "Complete guide to shaft machining" def test_parse_headings(self): html = """

Main Heading about Shaft Machining

Understanding CNC

Shaft Machining Process

What is a lathe?

Precision techniques

FAQ about shaft machining

""" parser = ContentHTMLParser() parser.feed(html) assert len(parser.h1_tags) == 1 assert "Shaft Machining" in parser.h1_tags[0] assert len(parser.h2_tags) == 2 assert len(parser.h3_tags) == 3 def test_parse_images(self): html = """ Shaft machining with CNC lathe Precision tools """ parser = ContentHTMLParser() parser.feed(html) assert len(parser.images) == 2 assert parser.images[0]["alt"] == "Shaft machining with CNC lathe" assert parser.images[1]["alt"] == "Precision tools" def test_parse_links(self): html = """ Home Page Related Article """ parser = ContentHTMLParser() parser.feed(html) assert len(parser.links) == 2 assert parser.links[0]["href"] == "/home" assert "Home Page" in parser.links[0]["text"] def test_parse_text_content(self): html = """

Title

This is some content about shaft machining and CNC operations.

More content here with precision lathe work.

""" parser = ContentHTMLParser() parser.feed(html) assert "shaft machining" in parser.text_content.lower() assert "CNC" in parser.text_content assert len(parser.text_content.split()) > 10 class TestValidationResult: """Tests for ValidationResult class""" def test_initial_state(self): result = ValidationResult(passed=True) assert result.passed is True assert len(result.errors) == 0 assert len(result.warnings) == 0 def test_add_error(self): result = ValidationResult(passed=True) result.add_error("test_rule", "Test error", expected=5, actual=3) assert result.passed is False assert len(result.errors) == 1 assert result.errors[0].rule_name == "test_rule" assert result.errors[0].severity == "error" def test_add_warning(self): result = ValidationResult(passed=True) result.add_warning("test_rule", "Test warning", expected=5, actual=4) assert result.passed is True assert len(result.warnings) == 1 assert result.warnings[0].severity == "warning" def test_to_dict(self): result = ValidationResult(passed=False) result.add_error("rule1", "Error message", expected=5, actual=3) result.add_warning("rule2", "Warning message", expected=10, actual=8) data = result.to_dict() assert data["passed"] is False assert len(data["errors"]) == 1 assert len(data["warnings"]) == 1 assert data["errors"][0]["rule"] == "rule1" assert data["warnings"][0]["rule"] == "rule2" class TestUniversalRules: """Tests for universal rule validation""" def test_content_length_validation(self, mock_config, sample_project): engine = ContentRuleEngine(mock_config) short_html = "

Shaft machining

Short content.

" result = engine.validate(short_html, sample_project) assert not result.passed assert any("too short" in e.message for e in result.errors) def test_title_keyword_required(self, mock_config, sample_project): engine = ContentRuleEngine(mock_config) html_without_keyword = "Generic Title" + "word " * 1500 + "" result = engine.validate(html_without_keyword, sample_project) assert any("title" in e.rule_name.lower() for e in result.errors) def test_h1_keyword_required(self, mock_config, sample_project): engine = ContentRuleEngine(mock_config) html = """ Shaft Machining Guide

Generic Heading

""" + "word " * 1500 + """

""" result = engine.validate(html, sample_project) assert any("h1" in e.rule_name.lower() for e in result.errors) def test_h2_keyword_minimum(self, mock_config, sample_project): engine = ContentRuleEngine(mock_config) html = """ Shaft Machining Guide

Shaft Machining Basics

Generic Topic

""" + "word " * 1500 + """

""" result = engine.validate(html, sample_project) assert any("h2_exact_match_min" in e.rule_name for e in result.errors) def test_faq_section_required(self, mock_config, sample_project): engine = ContentRuleEngine(mock_config) html = """ Shaft Machining Guide

Shaft Machining Basics

Shaft Machining Process

""" + "word " * 1500 + """

""" result = engine.validate(html, sample_project) assert any("faq" in e.rule_name.lower() for e in result.errors) def test_image_alt_text_validation(self, mock_config, sample_project): engine = ContentRuleEngine(mock_config) html = """ Shaft Machining Guide

Shaft Machining Basics

FAQ about shaft machining

Shaft Machining Techniques

What is shaft machining?

Generic image

""" + "word " * 1500 + """

""" result = engine.validate(html, sample_project) assert any("image_alt_text" in e.rule_name for e in result.errors) class TestCORAValidation: """Tests for CORA-specific validation""" def test_tier_1_strict_validation(self, mock_config, sample_project): engine = ContentRuleEngine(mock_config) sample_project.tier = 1 sample_project.h2_total = 5 html = """ Shaft Machining Guide

Shaft Machining with CNC

Shaft Machining Process

Understanding CNC

What is shaft machining?

FAQ

Shaft machining with CNC lathe

""" + "word " * 1500 + """

""" result = engine.validate(html, sample_project) h2_errors = [e for e in result.errors if "h2_total" in e.rule_name] assert len(h2_errors) > 0 assert h2_errors[0].expected == 5 assert h2_errors[0].actual == 2 def test_tier_2_warning_only(self, mock_config, sample_project): engine = ContentRuleEngine(mock_config) sample_project.tier = 2 sample_project.h2_total = 5 html = """ Shaft Machining Guide

Shaft Machining with CNC

Shaft Machining Process

Understanding CNC

What is shaft machining?

FAQ

Shaft machining with CNC lathe

""" + "word " * 1500 + """

""" result = engine.validate(html, sample_project) h2_warnings = [w for w in result.warnings if "h2_total" in w.rule_name] assert len(h2_warnings) > 0 h2_errors = [e for e in result.errors if "h2_total" in e.rule_name] assert len(h2_errors) == 0 def test_keyword_entity_counting(self, mock_config, sample_project): engine = ContentRuleEngine(mock_config) html = """ Shaft Machining Guide

Shaft Machining Basics

Shaft Machining Process

Understanding CNC Operations

Working with Precision Lathe

What is shaft machining?

CNC Techniques

FAQ

Shaft machining with CNC

""" + "word " * 1500 + """

""" parser = ContentHTMLParser() parser.feed(html) counts = engine._count_keyword_entities(parser, sample_project) assert counts["h1_exact"] == 1 assert counts["h2_exact"] == 1 assert counts["h2_entities"] >= 2 assert counts["h3_exact"] == 1 def test_round_averages_down(self, mock_config, sample_project): engine = ContentRuleEngine(mock_config) sample_project.h2_total = 5.6 html = """ Shaft Machining Guide

Shaft Machining with CNC

Shaft Machining Process

Understanding CNC

Lathe Operations

Precision Work

Best Practices

What is shaft machining?

FAQ

Shaft machining with CNC

""" + "word " * 1500 + """

""" result = engine.validate(html, sample_project) h2_issues = [e for e in result.errors if "h2_total" in e.rule_name] if h2_issues: assert h2_issues[0].expected == 5 class TestValidContent: """Tests for content that should pass validation""" def test_fully_compliant_content(self, mock_config, sample_project): engine = ContentRuleEngine(mock_config) html = """ Complete Guide to Shaft Machining

Shaft Machining: CNC Operations

Shaft Machining Process Explained

Content about the main process...

Understanding CNC Technology

More content...

Working with Precision Lathe

Additional information...

Shaft Machining Techniques

Techniques details...

Best Practices in CNC

Best practices...

What is shaft machining?

Definition and explanation...

CNC Lathe Operations

Operations details...

Precision Techniques

Techniques information...

Shaft Machining Process Guide

Process details...

Understanding Machining Techniques

Techniques overview...

CNC Setup and Shaft Machining Process

Setup instructions...

Lathe Maintenance for Machining Techniques

Maintenance tips...

FAQ: Common Questions about Shaft Machining

Frequently asked questions...

Shaft machining with CNC lathe Precision shaft machining setup

""" + " ".join(["shaft machining process details and information"] * 250) + """

""" result = engine.validate(html, sample_project) assert result.passed is True assert len(result.errors) == 0