""" Unit tests for content injection module """ import pytest from unittest.mock import Mock, MagicMock, patch from src.interlinking.content_injection import ( inject_interlinks, _inject_tiered_links, _inject_homepage_link, _inject_see_also_section, _get_anchor_texts_for_tier, _try_inject_link, _find_and_wrap_anchor_text, _insert_link_into_random_paragraph, _extract_homepage_url, _insert_before_closing_tags ) from src.database.models import GeneratedContent, Project @pytest.fixture def mock_project(): """Create a mock Project""" project = Mock(spec=Project) project.id = 1 project.main_keyword = "shaft machining" project.related_searches = ["cnc shaft machining", "precision shaft machining"] project.entities = ["lathe", "milling", "CNC"] return project @pytest.fixture def mock_content(): """Create a mock GeneratedContent""" content = Mock(spec=GeneratedContent) content.id = 1 content.project_id = 1 content.tier = "tier1" content.title = "Guide to Shaft Machining" content.content = "
Shaft machining is an important process. Learn about shaft machining here.
" return content @pytest.fixture def mock_content_repo(): """Create a mock GeneratedContentRepository""" repo = Mock() repo.update = Mock(return_value=None) return repo @pytest.fixture def mock_link_repo(): """Create a mock ArticleLinkRepository""" repo = Mock() repo.create = Mock(return_value=None) return repo class TestExtractHomepageUrl: """Tests for homepage URL extraction""" def test_extract_from_https_url(self): url = "https://example.com/article-slug.html" result = _extract_homepage_url(url) assert result == "https://example.com/" def test_extract_from_http_url(self): url = "http://example.com/article.html" result = _extract_homepage_url(url) assert result == "http://example.com/" def test_extract_from_cdn_url(self): url = "https://site.b-cdn.net/my-article.html" result = _extract_homepage_url(url) assert result == "https://site.b-cdn.net/" def test_extract_from_custom_domain(self): url = "https://www.custom.com/path/to/article.html" result = _extract_homepage_url(url) assert result == "https://www.custom.com/" def test_extract_with_port(self): url = "https://example.com:8080/article.html" result = _extract_homepage_url(url) assert result == "https://example.com:8080/" class TestInsertBeforeClosingTags: """Tests for inserting content before closing tags""" def test_insert_after_last_paragraph(self): html = "First paragraph
Last paragraph
" content = "Content
" content = "This is about shaft machining processes.
" anchor = "shaft machining" url = "https://example.com" result, found = _find_and_wrap_anchor_text(html, anchor, url) assert found assert f'' in result assert "shaft machining" in result def test_case_insensitive_match(self): html = "This is about Shaft Machining processes.
" anchor = "shaft machining" url = "https://example.com" result, found = _find_and_wrap_anchor_text(html, anchor, url) assert found assert f'' in result def test_match_within_phrase(self): html = "The shaft machining process is complex.
" anchor = "shaft machining" url = "https://example.com" result, found = _find_and_wrap_anchor_text(html, anchor, url) assert found assert f'' in result def test_no_match(self): html = "This is about something else.
" anchor = "shaft machining" url = "https://example.com" result, found = _find_and_wrap_anchor_text(html, anchor, url) assert not found assert result == html def test_skip_existing_links(self): html = 'Read about shaft machining here. Also shaft machining is important.
' anchor = "shaft machining" url = "https://example.com" result, found = _find_and_wrap_anchor_text(html, anchor, url) assert found # Should link the second occurrence, not the one already linked assert result.count(f'') == 1 class TestInsertLinkIntoRandomParagraph: """Tests for inserting link into random paragraph""" def test_insert_into_paragraph(self): html = "This is a long paragraph with many words and sentences. It has enough content.
" anchor = "shaft machining" url = "https://example.com" result = _insert_link_into_random_paragraph(html, anchor, url) assert f'{anchor}' in result def test_insert_with_multiple_paragraphs(self): html = "First paragraph.
Second paragraph with more text.
Third paragraph.
" anchor = "test link" url = "https://example.com" result = _insert_link_into_random_paragraph(html, anchor, url) assert f'{anchor}' in result def test_no_valid_paragraphs(self): html = "Hi
Ok
" anchor = "test" url = "https://example.com" result = _insert_link_into_random_paragraph(html, anchor, url) # Should return original HTML if no valid paragraphs assert result == html or f'' in result class TestGetAnchorTextsForTier: """Tests for anchor text generation with job config overrides""" def test_default_mode(self, mock_project): job_config = {"anchor_text_config": {"mode": "default"}} with patch('src.interlinking.content_injection.get_anchor_text_for_tier') as mock_get: mock_get.return_value = ["anchor1", "anchor2"] result = _get_anchor_texts_for_tier("tier1", mock_project, job_config) assert result == ["anchor1", "anchor2"] def test_override_mode(self, mock_project): custom = ["custom anchor 1", "custom anchor 2"] job_config = {"anchor_text_config": {"mode": "override", "custom_text": custom}} result = _get_anchor_texts_for_tier("tier1", mock_project, job_config) assert result == custom def test_append_mode(self, mock_project): custom = ["custom anchor"] job_config = {"anchor_text_config": {"mode": "append", "custom_text": custom}} with patch('src.interlinking.content_injection.get_anchor_text_for_tier') as mock_get: mock_get.return_value = ["default1", "default2"] result = _get_anchor_texts_for_tier("tier1", mock_project, job_config) assert result == ["default1", "default2", "custom anchor"] def test_no_config(self, mock_project): job_config = None with patch('src.interlinking.content_injection.get_anchor_text_for_tier') as mock_get: mock_get.return_value = ["default"] result = _get_anchor_texts_for_tier("tier1", mock_project, job_config) assert result == ["default"] class TestTryInjectLink: """Tests for link injection attempts""" def test_inject_with_found_anchor(self): html = "This is about shaft machining here.
" anchors = ["shaft machining", "other anchor"] url = "https://example.com" result, injected = _try_inject_link(html, anchors, url) assert injected assert f'' in result def test_inject_with_fallback(self): html = "This is a paragraph about something else entirely.
" anchors = ["shaft machining"] url = "https://example.com" result, injected = _try_inject_link(html, anchors, url) assert injected assert f'' in result def test_no_anchors(self): html = "Content
" anchors = [] url = "https://example.com" result, injected = _try_inject_link(html, anchors, url) assert not injected assert result == html class TestInjectSeeAlsoSection: """Tests for See Also section injection""" def test_inject_see_also_with_multiple_articles(self, mock_content, mock_link_repo): html = "Article content here.
" article_urls = [ {"content_id": 1, "title": "Article 1", "url": "https://example.com/article1.html"}, {"content_id": 2, "title": "Article 2", "url": "https://example.com/article2.html"}, {"content_id": 3, "title": "Article 3", "url": "https://example.com/article3.html"} ] mock_content.id = 1 result = _inject_see_also_section(html, mock_content, article_urls, mock_link_repo) assert "Content
" article_urls = [ {"content_id": 1, "title": "Only Article", "url": "https://example.com/article.html"} ] mock_content.id = 1 result = _inject_see_also_section(html, mock_content, article_urls, mock_link_repo) # No other articles, should return original HTML assert result == html or "This is about content and going Home is great.
" article_url = "https://example.com/article.html" result = _inject_homepage_link(html, mock_content, article_url, mock_project, mock_link_repo) assert '' in result assert 'Home' in result mock_link_repo.create.assert_called_once() call_args = mock_link_repo.create.call_args assert call_args[1]['link_type'] == 'homepage' def test_inject_homepage_link_not_found_in_content(self, mock_content, mock_project, mock_link_repo): html = "This is about something totally different and unrelated content here.
" article_url = "https://www.example.com/article.html" result = _inject_homepage_link(html, mock_content, article_url, mock_project, mock_link_repo) # Should still inject via fallback (using "Home" anchor text) assert '' in result assert 'Home' in result class TestInjectTieredLinks: """Tests for tiered link injection""" def test_tier1_money_site_link(self, mock_content, mock_project, mock_link_repo): html = "Learn about shaft machining processes.
" tiered_links = {"tier": 1, "money_site_url": "https://moneysite.com"} job_config = None with patch('src.interlinking.content_injection.get_anchor_text_for_tier') as mock_get: mock_get.return_value = ["shaft machining", "machining"] result = _inject_tiered_links(html, mock_content, tiered_links, mock_project, job_config, mock_link_repo) assert '' in result mock_link_repo.create.assert_called_once() call_args = mock_link_repo.create.call_args assert call_args[1]['link_type'] == 'tiered' assert call_args[1]['to_url'] == 'https://moneysite.com' def test_tier2_lower_tier_links(self, mock_content, mock_project, mock_link_repo): html = "This article discusses shaft machining and CNC processes and precision work.
" mock_content.tier = "tier2" tiered_links = { "tier": 2, "lower_tier": 1, "lower_tier_urls": [ "https://site1.com/article1.html", "https://site2.com/article2.html" ] } job_config = None with patch('src.interlinking.content_injection.get_anchor_text_for_tier') as mock_get: mock_get.return_value = ["shaft machining", "CNC processes"] result = _inject_tiered_links(html, mock_content, tiered_links, mock_project, job_config, mock_link_repo) # Should create links for both URLs assert mock_link_repo.create.call_count == 2 def test_tier1_no_money_site(self, mock_content, mock_project, mock_link_repo): html = "Content
" tiered_links = {"tier": 1} job_config = None result = _inject_tiered_links(html, mock_content, tiered_links, mock_project, job_config, mock_link_repo) # Should return original HTML with warning assert result == html mock_link_repo.create.assert_not_called() class TestInjectInterlinks: """Tests for main inject_interlinks function""" def test_empty_content_records(self, mock_project, mock_content_repo, mock_link_repo): inject_interlinks([], [], {}, mock_project, None, mock_content_repo, mock_link_repo) # Should not crash, just log warning mock_content_repo.update.assert_not_called() def test_successful_injection(self, mock_content, mock_project, mock_content_repo, mock_link_repo): article_urls = [ {"content_id": 1, "title": "Article 1", "url": "https://example.com/article1.html"}, {"content_id": 2, "title": "Article 2", "url": "https://example.com/article2.html"} ] tiered_links = {"tier": 1, "money_site_url": "https://moneysite.com"} job_config = None with patch('src.interlinking.content_injection._inject_tiered_links') as mock_tiered, \ patch('src.interlinking.content_injection._inject_homepage_link') as mock_home, \ patch('src.interlinking.content_injection._inject_see_also_section') as mock_see_also: mock_tiered.return_value = "Updated content
" mock_home.return_value = "Updated content
" mock_see_also.return_value = "Updated content
" inject_interlinks( [mock_content], article_urls, tiered_links, mock_project, job_config, mock_content_repo, mock_link_repo ) mock_content_repo.update.assert_called_once() def test_missing_url_for_content(self, mock_content, mock_project, mock_content_repo, mock_link_repo): article_urls = [ {"content_id": 2, "title": "Article 2", "url": "https://example.com/article2.html"} ] tiered_links = {"tier": 1, "money_site_url": "https://moneysite.com"} mock_content.id = 1 # ID not in article_urls inject_interlinks( [mock_content], article_urls, tiered_links, mock_project, None, mock_content_repo, mock_link_repo ) # Should skip this content mock_content_repo.update.assert_not_called()