378 lines
14 KiB
Python
378 lines
14 KiB
Python
"""
|
|
Integration tests for CORA ingestion workflow
|
|
"""
|
|
|
|
import pytest
|
|
from pathlib import Path
|
|
from click.testing import CliRunner
|
|
from src.cli.commands import app
|
|
from src.database.repositories import UserRepository, ProjectRepository
|
|
from src.database.models import User, Project
|
|
from src.auth.service import AuthService
|
|
from src.ingestion.parser import CORAParser, CORAParseError
|
|
|
|
|
|
class TestCORAParserIntegration:
|
|
"""Integration tests for CORA parser with real Excel files"""
|
|
|
|
def test_parse_sample_cora_file(self):
|
|
"""Test parsing the actual sample CORA file"""
|
|
sample_file = Path("shaft_machining_goog_251011_C_US_L_EN_M3P1A_GMW.xlsx")
|
|
|
|
if not sample_file.exists():
|
|
pytest.skip("Sample CORA file not found")
|
|
|
|
parser = CORAParser(str(sample_file))
|
|
data = parser.parse()
|
|
|
|
assert data["main_keyword"] == "shaft machining"
|
|
assert data["word_count"] is not None
|
|
assert data["term_frequency"] is not None
|
|
assert isinstance(data["entities"], list)
|
|
assert isinstance(data["related_searches"], list)
|
|
assert data["custom_anchor_text"] == []
|
|
|
|
def test_parse_sample_file_with_custom_anchors(self):
|
|
"""Test parsing with custom anchor text"""
|
|
sample_file = Path("shaft_machining_goog_251011_C_US_L_EN_M3P1A_GMW.xlsx")
|
|
|
|
if not sample_file.exists():
|
|
pytest.skip("Sample CORA file not found")
|
|
|
|
custom_anchors = ["custom anchor 1", "custom anchor 2"]
|
|
parser = CORAParser(str(sample_file))
|
|
data = parser.parse(custom_anchor_text=custom_anchors)
|
|
|
|
assert data["custom_anchor_text"] == custom_anchors
|
|
|
|
def test_parse_nonexistent_file_raises_error(self):
|
|
"""Test error raised for nonexistent file"""
|
|
with pytest.raises(CORAParseError, match="File not found"):
|
|
CORAParser("nonexistent_file.xlsx")
|
|
|
|
|
|
class TestProjectRepositoryIntegration:
|
|
"""Integration tests for Project repository with real database"""
|
|
|
|
def test_create_project(self, db_session):
|
|
"""Test creating a project in database"""
|
|
user_repo = UserRepository(db_session)
|
|
auth_service = AuthService(user_repo)
|
|
|
|
user = auth_service.create_user_with_hashed_password("testuser", "password", "User")
|
|
|
|
project_repo = ProjectRepository(db_session)
|
|
|
|
project_data = {
|
|
"main_keyword": "test keyword",
|
|
"word_count": 1500,
|
|
"term_frequency": 3,
|
|
"related_search_density": 0.15,
|
|
"entity_density": 0.10,
|
|
"lsi_density": 0.05,
|
|
"spintax_related_search_terms": "{term1|term2|term3}",
|
|
"title_exact_match": 1,
|
|
"title_related_search": 2,
|
|
"meta_exact_match": 1,
|
|
"meta_related_search": 1,
|
|
"meta_entities": 2,
|
|
"h1_exact": 1,
|
|
"h1_related_search": 0,
|
|
"h1_entities": 1,
|
|
"h1_lsi": 0,
|
|
"h2_total": 5,
|
|
"h2_exact": 2,
|
|
"h2_related_search": 2,
|
|
"h2_entities": 1,
|
|
"h2_lsi": 0,
|
|
"h3_total": 8,
|
|
"h3_exact": 3,
|
|
"h3_related_search": 3,
|
|
"h3_entities": 2,
|
|
"h3_lsi": 0,
|
|
"entities": ["entity1", "entity2", "entity3"],
|
|
"related_searches": ["search1", "search2"],
|
|
"custom_anchor_text": []
|
|
}
|
|
|
|
project = project_repo.create(user.id, "Test Project", project_data)
|
|
|
|
assert project.id is not None
|
|
assert project.name == "Test Project"
|
|
assert project.main_keyword == "test keyword"
|
|
assert project.user_id == user.id
|
|
assert project.word_count == 1500
|
|
assert project.term_frequency == 3
|
|
assert len(project.entities) == 3
|
|
assert len(project.related_searches) == 2
|
|
|
|
def test_get_projects_by_user(self, db_session):
|
|
"""Test getting projects for specific user"""
|
|
user_repo = UserRepository(db_session)
|
|
auth_service = AuthService(user_repo)
|
|
|
|
user1 = auth_service.create_user_with_hashed_password("user1", "password", "User")
|
|
user2 = auth_service.create_user_with_hashed_password("user2", "password", "User")
|
|
|
|
project_repo = ProjectRepository(db_session)
|
|
|
|
project1 = project_repo.create(user1.id, "Project 1", {"main_keyword": "keyword1"})
|
|
project2 = project_repo.create(user1.id, "Project 2", {"main_keyword": "keyword2"})
|
|
project3 = project_repo.create(user2.id, "Project 3", {"main_keyword": "keyword3"})
|
|
|
|
user1_projects = project_repo.get_by_user_id(user1.id)
|
|
user2_projects = project_repo.get_by_user_id(user2.id)
|
|
|
|
assert len(user1_projects) == 2
|
|
assert len(user2_projects) == 1
|
|
assert project1 in user1_projects
|
|
assert project2 in user1_projects
|
|
assert project3 in user2_projects
|
|
|
|
def test_get_all_projects(self, db_session):
|
|
"""Test getting all projects"""
|
|
user_repo = UserRepository(db_session)
|
|
auth_service = AuthService(user_repo)
|
|
|
|
user = auth_service.create_user_with_hashed_password("testuser", "password", "User")
|
|
|
|
project_repo = ProjectRepository(db_session)
|
|
|
|
project1 = project_repo.create(user.id, "Project 1", {"main_keyword": "keyword1"})
|
|
project2 = project_repo.create(user.id, "Project 2", {"main_keyword": "keyword2"})
|
|
|
|
all_projects = project_repo.get_all()
|
|
|
|
assert len(all_projects) >= 2
|
|
assert project1 in all_projects
|
|
assert project2 in all_projects
|
|
|
|
def test_delete_project(self, db_session):
|
|
"""Test deleting a project"""
|
|
user_repo = UserRepository(db_session)
|
|
auth_service = AuthService(user_repo)
|
|
|
|
user = auth_service.create_user_with_hashed_password("testuser", "password", "User")
|
|
|
|
project_repo = ProjectRepository(db_session)
|
|
|
|
project = project_repo.create(user.id, "Test Project", {"main_keyword": "test"})
|
|
project_id = project.id
|
|
|
|
deleted = project_repo.delete(project_id)
|
|
assert deleted is True
|
|
|
|
retrieved = project_repo.get_by_id(project_id)
|
|
assert retrieved is None
|
|
|
|
|
|
class TestCoraIngestionCLIIntegration:
|
|
"""Integration tests for CORA ingestion CLI command with real database"""
|
|
|
|
def test_ingest_cora_cli_command(self, db_session):
|
|
"""Test full CORA ingestion workflow via CLI"""
|
|
runner = CliRunner()
|
|
|
|
user_repo = UserRepository(db_session)
|
|
auth_service = AuthService(user_repo)
|
|
user = auth_service.create_user_with_hashed_password("testuser", "password123", "User")
|
|
|
|
sample_file = Path("shaft_machining_goog_251011_C_US_L_EN_M3P1A_GMW.xlsx")
|
|
|
|
if not sample_file.exists():
|
|
pytest.skip("Sample CORA file not found")
|
|
|
|
result = runner.invoke(app, [
|
|
'ingest-cora',
|
|
'--file', str(sample_file),
|
|
'--name', 'Shaft Machining Project',
|
|
'--username', 'testuser',
|
|
'--password', 'password123'
|
|
])
|
|
|
|
if result.exit_code != 0:
|
|
print(f"\nCLI Output:\n{result.output}")
|
|
print(f"\nException: {result.exception}")
|
|
|
|
assert result.exit_code == 0
|
|
assert "Success: Project 'Shaft Machining Project' created" in result.output
|
|
assert "shaft machining" in result.output.lower()
|
|
|
|
project_repo = ProjectRepository(db_session)
|
|
projects = project_repo.get_by_user_id(user.id)
|
|
|
|
assert len(projects) >= 1
|
|
created_project = projects[-1]
|
|
assert created_project.name == 'Shaft Machining Project'
|
|
assert created_project.main_keyword == 'shaft machining'
|
|
|
|
def test_ingest_cora_with_custom_anchors_cli(self, db_session):
|
|
"""Test CORA ingestion with custom anchor text via CLI"""
|
|
runner = CliRunner()
|
|
|
|
user_repo = UserRepository(db_session)
|
|
auth_service = AuthService(user_repo)
|
|
user = auth_service.create_user_with_hashed_password("testuser", "password123", "User")
|
|
|
|
sample_file = Path("shaft_machining_goog_251011_C_US_L_EN_M3P1A_GMW.xlsx")
|
|
|
|
if not sample_file.exists():
|
|
pytest.skip("Sample CORA file not found")
|
|
|
|
result = runner.invoke(app, [
|
|
'ingest-cora',
|
|
'--file', str(sample_file),
|
|
'--name', 'Test Project',
|
|
'--custom-anchors', 'anchor1,anchor2,anchor3',
|
|
'--username', 'testuser',
|
|
'--password', 'password123'
|
|
])
|
|
|
|
assert result.exit_code == 0
|
|
assert "Custom Anchor Text: anchor1, anchor2, anchor3" in result.output
|
|
|
|
project_repo = ProjectRepository(db_session)
|
|
projects = project_repo.get_by_user_id(user.id)
|
|
|
|
created_project = projects[-1]
|
|
assert created_project.custom_anchor_text == ["anchor1", "anchor2", "anchor3"]
|
|
|
|
def test_ingest_cora_authentication_required(self):
|
|
"""Test CORA ingestion requires authentication"""
|
|
runner = CliRunner()
|
|
|
|
sample_file = Path("shaft_machining_goog_251011_C_US_L_EN_M3P1A_GMW.xlsx")
|
|
|
|
if not sample_file.exists():
|
|
pytest.skip("Sample CORA file not found")
|
|
|
|
result = runner.invoke(app, [
|
|
'ingest-cora',
|
|
'--file', str(sample_file),
|
|
'--name', 'Test Project',
|
|
'--username', 'nonexistent',
|
|
'--password', 'wrongpassword'
|
|
])
|
|
|
|
assert result.exit_code != 0
|
|
assert "Authentication failed" in result.output
|
|
|
|
|
|
class TestListProjectsCLIIntegration:
|
|
"""Integration tests for list-projects CLI command with real database"""
|
|
|
|
def test_list_projects_user_view(self, db_session):
|
|
"""Test listing projects for regular user"""
|
|
runner = CliRunner()
|
|
|
|
user_repo = UserRepository(db_session)
|
|
auth_service = AuthService(user_repo)
|
|
user = auth_service.create_user_with_hashed_password("testuser", "password123", "User")
|
|
|
|
project_repo = ProjectRepository(db_session)
|
|
project1 = project_repo.create(user.id, "Project 1", {"main_keyword": "keyword1"})
|
|
project2 = project_repo.create(user.id, "Project 2", {"main_keyword": "keyword2"})
|
|
|
|
result = runner.invoke(app, [
|
|
'list-projects',
|
|
'--username', 'testuser',
|
|
'--password', 'password123'
|
|
])
|
|
|
|
assert result.exit_code == 0
|
|
assert "Your Projects:" in result.output
|
|
assert "Project 1" in result.output
|
|
assert "Project 2" in result.output
|
|
assert "keyword1" in result.output
|
|
assert "keyword2" in result.output
|
|
|
|
def test_list_projects_admin_view(self, db_session):
|
|
"""Test listing all projects for admin"""
|
|
runner = CliRunner()
|
|
|
|
user_repo = UserRepository(db_session)
|
|
auth_service = AuthService(user_repo)
|
|
admin = auth_service.create_user_with_hashed_password("admin", "password123", "Admin")
|
|
user = auth_service.create_user_with_hashed_password("testuser", "password123", "User")
|
|
|
|
project_repo = ProjectRepository(db_session)
|
|
project1 = project_repo.create(user.id, "User Project", {"main_keyword": "keyword1"})
|
|
project2 = project_repo.create(admin.id, "Admin Project", {"main_keyword": "keyword2"})
|
|
|
|
result = runner.invoke(app, [
|
|
'list-projects',
|
|
'--username', 'admin',
|
|
'--password', 'password123'
|
|
])
|
|
|
|
assert result.exit_code == 0
|
|
assert "All Projects (Admin View):" in result.output
|
|
assert "User Project" in result.output
|
|
assert "Admin Project" in result.output
|
|
|
|
def test_list_projects_empty(self, db_session):
|
|
"""Test listing projects when user has none"""
|
|
runner = CliRunner()
|
|
|
|
user_repo = UserRepository(db_session)
|
|
auth_service = AuthService(user_repo)
|
|
user = auth_service.create_user_with_hashed_password("testuser", "password123", "User")
|
|
|
|
result = runner.invoke(app, [
|
|
'list-projects',
|
|
'--username', 'testuser',
|
|
'--password', 'password123'
|
|
])
|
|
|
|
assert result.exit_code == 0
|
|
assert "No projects found" in result.output
|
|
|
|
|
|
class TestFullCORAWorkflow:
|
|
"""End-to-end workflow tests"""
|
|
|
|
def test_complete_cora_ingestion_workflow(self, db_session):
|
|
"""Test complete workflow: create user, ingest CORA, list projects"""
|
|
runner = CliRunner()
|
|
|
|
user_repo = UserRepository(db_session)
|
|
auth_service = AuthService(user_repo)
|
|
user = auth_service.create_user_with_hashed_password("workflowuser", "password123", "User")
|
|
|
|
sample_file = Path("shaft_machining_goog_251011_C_US_L_EN_M3P1A_GMW.xlsx")
|
|
|
|
if not sample_file.exists():
|
|
pytest.skip("Sample CORA file not found")
|
|
|
|
ingest_result = runner.invoke(app, [
|
|
'ingest-cora',
|
|
'--file', str(sample_file),
|
|
'--name', 'Workflow Test Project',
|
|
'--username', 'workflowuser',
|
|
'--password', 'password123'
|
|
])
|
|
|
|
assert ingest_result.exit_code == 0
|
|
assert "Success" in ingest_result.output
|
|
|
|
list_result = runner.invoke(app, [
|
|
'list-projects',
|
|
'--username', 'workflowuser',
|
|
'--password', 'password123'
|
|
])
|
|
|
|
assert list_result.exit_code == 0
|
|
assert "Workflow Test Project" in list_result.output
|
|
assert "shaft machining" in list_result.output
|
|
|
|
project_repo = ProjectRepository(db_session)
|
|
projects = project_repo.get_by_user_id(user.id)
|
|
|
|
assert len(projects) >= 1
|
|
project = projects[-1]
|
|
assert project.main_keyword == "shaft machining"
|
|
assert project.word_count is not None
|
|
assert project.entities is not None
|
|
assert len(project.entities) > 0
|
|
|