""" Unit tests for content injection module """ import pytest from unittest.mock import Mock, MagicMock, patch from src.interlinking.content_injection import ( inject_interlinks, _inject_tiered_links, _inject_homepage_link, _inject_see_also_section, _get_anchor_texts_for_tier, _try_inject_link, _find_and_wrap_anchor_text, _insert_link_into_random_paragraph, _extract_homepage_url, _insert_before_closing_tags ) from src.database.models import GeneratedContent, Project @pytest.fixture def mock_project(): """Create a mock Project""" project = Mock(spec=Project) project.id = 1 project.main_keyword = "shaft machining" project.related_searches = ["cnc shaft machining", "precision shaft machining"] project.entities = ["lathe", "milling", "CNC"] return project @pytest.fixture def mock_content(): """Create a mock GeneratedContent""" content = Mock(spec=GeneratedContent) content.id = 1 content.project_id = 1 content.tier = "tier1" content.title = "Guide to Shaft Machining" content.content = "

Shaft machining is an important process. Learn about shaft machining here.

" return content @pytest.fixture def mock_content_repo(): """Create a mock GeneratedContentRepository""" repo = Mock() repo.update = Mock(return_value=None) return repo @pytest.fixture def mock_link_repo(): """Create a mock ArticleLinkRepository""" repo = Mock() repo.create = Mock(return_value=None) return repo class TestExtractHomepageUrl: """Tests for homepage URL extraction""" def test_extract_from_https_url(self): url = "https://example.com/article-slug.html" result = _extract_homepage_url(url) assert result == "https://example.com/" def test_extract_from_http_url(self): url = "http://example.com/article.html" result = _extract_homepage_url(url) assert result == "http://example.com/" def test_extract_from_cdn_url(self): url = "https://site.b-cdn.net/my-article.html" result = _extract_homepage_url(url) assert result == "https://site.b-cdn.net/" def test_extract_from_custom_domain(self): url = "https://www.custom.com/path/to/article.html" result = _extract_homepage_url(url) assert result == "https://www.custom.com/" def test_extract_with_port(self): url = "https://example.com:8080/article.html" result = _extract_homepage_url(url) assert result == "https://example.com:8080/" class TestInsertBeforeClosingTags: """Tests for inserting content before closing tags""" def test_insert_after_last_paragraph(self): html = "

First paragraph

Last paragraph

" content = "

New Section

" result = _insert_before_closing_tags(html, content) assert "

New Section

" in result assert result.index("Last paragraph") < result.index("

New Section

") def test_insert_with_body_tag(self): html = "

Content

" content = "

See Also

" result = _insert_before_closing_tags(html, content) assert "

See Also

" in result def test_insert_with_no_paragraphs(self): html = "
Some content
" content = "

Section

" result = _insert_before_closing_tags(html, content) assert "

Section

" in result class TestFindAndWrapAnchorText: """Tests for finding and wrapping anchor text""" def test_find_exact_match(self): html = "

This is about shaft machining processes.

" anchor = "shaft machining" url = "https://example.com" result, found = _find_and_wrap_anchor_text(html, anchor, url) assert found assert f'' in result assert "shaft machining" in result def test_case_insensitive_match(self): html = "

This is about Shaft Machining processes.

" anchor = "shaft machining" url = "https://example.com" result, found = _find_and_wrap_anchor_text(html, anchor, url) assert found assert f'' in result def test_match_within_phrase(self): html = "

The shaft machining process is complex.

" anchor = "shaft machining" url = "https://example.com" result, found = _find_and_wrap_anchor_text(html, anchor, url) assert found assert f'
' in result def test_no_match(self): html = "

This is about something else.

" anchor = "shaft machining" url = "https://example.com" result, found = _find_and_wrap_anchor_text(html, anchor, url) assert not found assert result == html def test_skip_existing_links(self): html = '

Read about shaft machining here. Also shaft machining is important.

' anchor = "shaft machining" url = "https://example.com" result, found = _find_and_wrap_anchor_text(html, anchor, url) assert found # Should link the second occurrence, not the one already linked assert result.count(f'') == 1 class TestInsertLinkIntoRandomParagraph: """Tests for inserting link into random paragraph""" def test_insert_into_paragraph(self): html = "

This is a long paragraph with many words and sentences. It has enough content.

" anchor = "shaft machining" url = "https://example.com" result = _insert_link_into_random_paragraph(html, anchor, url) assert f'
{anchor}' in result def test_insert_with_multiple_paragraphs(self): html = "

First paragraph.

Second paragraph with more text.

Third paragraph.

" anchor = "test link" url = "https://example.com" result = _insert_link_into_random_paragraph(html, anchor, url) assert f'{anchor}' in result def test_no_valid_paragraphs(self): html = "

Hi

Ok

" anchor = "test" url = "https://example.com" result = _insert_link_into_random_paragraph(html, anchor, url) # Should return original HTML if no valid paragraphs assert result == html or f'' in result class TestGetAnchorTextsForTier: """Tests for anchor text generation with job config overrides""" def test_default_mode(self, mock_project): job_config = {"anchor_text_config": {"mode": "default"}} with patch('src.interlinking.content_injection.get_anchor_text_for_tier') as mock_get: mock_get.return_value = ["anchor1", "anchor2"] result = _get_anchor_texts_for_tier("tier1", mock_project, job_config) assert result == ["anchor1", "anchor2"] def test_override_mode(self, mock_project): custom = ["custom anchor 1", "custom anchor 2"] job_config = {"anchor_text_config": {"mode": "override", "custom_text": custom}} result = _get_anchor_texts_for_tier("tier1", mock_project, job_config) assert result == custom def test_append_mode(self, mock_project): custom = ["custom anchor"] job_config = {"anchor_text_config": {"mode": "append", "custom_text": custom}} with patch('src.interlinking.content_injection.get_anchor_text_for_tier') as mock_get: mock_get.return_value = ["default1", "default2"] result = _get_anchor_texts_for_tier("tier1", mock_project, job_config) assert result == ["default1", "default2", "custom anchor"] def test_no_config(self, mock_project): job_config = None with patch('src.interlinking.content_injection.get_anchor_text_for_tier') as mock_get: mock_get.return_value = ["default"] result = _get_anchor_texts_for_tier("tier1", mock_project, job_config) assert result == ["default"] class TestTryInjectLink: """Tests for link injection attempts""" def test_inject_with_found_anchor(self): html = "

This is about shaft machining here.

" anchors = ["shaft machining", "other anchor"] url = "https://example.com" result, injected = _try_inject_link(html, anchors, url) assert injected assert f'
' in result def test_inject_with_fallback(self): html = "

This is a paragraph about something else entirely.

" anchors = ["shaft machining"] url = "https://example.com" result, injected = _try_inject_link(html, anchors, url) assert injected assert f'
' in result def test_no_anchors(self): html = "

Content

" anchors = [] url = "https://example.com" result, injected = _try_inject_link(html, anchors, url) assert not injected assert result == html class TestInjectSeeAlsoSection: """Tests for See Also section injection""" def test_inject_see_also_with_multiple_articles(self, mock_content, mock_link_repo): html = "

Article content here.

" article_urls = [ {"content_id": 1, "title": "Article 1", "url": "https://example.com/article1.html"}, {"content_id": 2, "title": "Article 2", "url": "https://example.com/article2.html"}, {"content_id": 3, "title": "Article 3", "url": "https://example.com/article3.html"} ] mock_content.id = 1 result = _inject_see_also_section(html, mock_content, article_urls, mock_link_repo) assert "

See Also

" in result assert "