""" Integration tests for CORA ingestion workflow """ import pytest from pathlib import Path from click.testing import CliRunner from src.cli.commands import app from src.database.repositories import UserRepository, ProjectRepository from src.database.models import User, Project from src.auth.service import AuthService from src.ingestion.parser import CORAParser, CORAParseError class TestCORAParserIntegration: """Integration tests for CORA parser with real Excel files""" def test_parse_sample_cora_file(self): """Test parsing the actual sample CORA file""" sample_file = Path("shaft_machining_goog_251011_C_US_L_EN_M3P1A_GMW.xlsx") if not sample_file.exists(): pytest.skip("Sample CORA file not found") parser = CORAParser(str(sample_file)) data = parser.parse() assert data["main_keyword"] == "shaft machining" assert data["word_count"] is not None assert data["term_frequency"] is not None assert isinstance(data["entities"], list) assert isinstance(data["related_searches"], list) assert data["custom_anchor_text"] == [] def test_parse_sample_file_with_custom_anchors(self): """Test parsing with custom anchor text""" sample_file = Path("shaft_machining_goog_251011_C_US_L_EN_M3P1A_GMW.xlsx") if not sample_file.exists(): pytest.skip("Sample CORA file not found") custom_anchors = ["custom anchor 1", "custom anchor 2"] parser = CORAParser(str(sample_file)) data = parser.parse(custom_anchor_text=custom_anchors) assert data["custom_anchor_text"] == custom_anchors def test_parse_nonexistent_file_raises_error(self): """Test error raised for nonexistent file""" with pytest.raises(CORAParseError, match="File not found"): CORAParser("nonexistent_file.xlsx") class TestProjectRepositoryIntegration: """Integration tests for Project repository with real database""" def test_create_project(self, db_session): """Test creating a project in database""" user_repo = UserRepository(db_session) auth_service = AuthService(user_repo) user = auth_service.create_user_with_hashed_password("testuser", "password", "User") project_repo = ProjectRepository(db_session) project_data = { "main_keyword": "test keyword", "word_count": 1500, "term_frequency": 3, "related_search_density": 0.15, "entity_density": 0.10, "lsi_density": 0.05, "spintax_related_search_terms": "{term1|term2|term3}", "title_exact_match": 1, "title_related_search": 2, "meta_exact_match": 1, "meta_related_search": 1, "meta_entities": 2, "h1_exact": 1, "h1_related_search": 0, "h1_entities": 1, "h1_lsi": 0, "h2_total": 5, "h2_exact": 2, "h2_related_search": 2, "h2_entities": 1, "h2_lsi": 0, "h3_total": 8, "h3_exact": 3, "h3_related_search": 3, "h3_entities": 2, "h3_lsi": 0, "entities": ["entity1", "entity2", "entity3"], "related_searches": ["search1", "search2"], "custom_anchor_text": [] } project = project_repo.create(user.id, "Test Project", project_data) assert project.id is not None assert project.name == "Test Project" assert project.main_keyword == "test keyword" assert project.user_id == user.id assert project.word_count == 1500 assert project.term_frequency == 2 assert len(project.entities) == 3 assert len(project.related_searches) == 2 def test_get_projects_by_user(self, db_session): """Test getting projects for specific user""" user_repo = UserRepository(db_session) auth_service = AuthService(user_repo) user1 = auth_service.create_user_with_hashed_password("user1", "password", "User") user2 = auth_service.create_user_with_hashed_password("user2", "password", "User") project_repo = ProjectRepository(db_session) project1 = project_repo.create(user1.id, "Project 1", {"main_keyword": "keyword1"}) project2 = project_repo.create(user1.id, "Project 2", {"main_keyword": "keyword2"}) project3 = project_repo.create(user2.id, "Project 3", {"main_keyword": "keyword3"}) user1_projects = project_repo.get_by_user_id(user1.id) user2_projects = project_repo.get_by_user_id(user2.id) assert len(user1_projects) == 2 assert len(user2_projects) == 1 assert project1 in user1_projects assert project2 in user1_projects assert project3 in user2_projects def test_get_all_projects(self, db_session): """Test getting all projects""" user_repo = UserRepository(db_session) auth_service = AuthService(user_repo) user = auth_service.create_user_with_hashed_password("testuser", "password", "User") project_repo = ProjectRepository(db_session) project1 = project_repo.create(user.id, "Project 1", {"main_keyword": "keyword1"}) project2 = project_repo.create(user.id, "Project 2", {"main_keyword": "keyword2"}) all_projects = project_repo.get_all() assert len(all_projects) >= 2 assert project1 in all_projects assert project2 in all_projects def test_delete_project(self, db_session): """Test deleting a project""" user_repo = UserRepository(db_session) auth_service = AuthService(user_repo) user = auth_service.create_user_with_hashed_password("testuser", "password", "User") project_repo = ProjectRepository(db_session) project = project_repo.create(user.id, "Test Project", {"main_keyword": "test"}) project_id = project.id deleted = project_repo.delete(project_id) assert deleted is True retrieved = project_repo.get_by_id(project_id) assert retrieved is None class TestCoraIngestionCLIIntegration: """Integration tests for CORA ingestion CLI command with real database""" def test_ingest_cora_cli_command(self, db_session): """Test full CORA ingestion workflow via CLI""" runner = CliRunner() user_repo = UserRepository(db_session) auth_service = AuthService(user_repo) user = auth_service.create_user_with_hashed_password("testuser", "password123", "User") sample_file = Path("shaft_machining_goog_251011_C_US_L_EN_M3P1A_GMW.xlsx") if not sample_file.exists(): pytest.skip("Sample CORA file not found") result = runner.invoke(app, [ 'ingest-cora', '--file', str(sample_file), '--name', 'Shaft Machining Project', '--username', 'testuser', '--password', 'password123' ]) if result.exit_code != 0: print(f"\nCLI Output:\n{result.output}") print(f"\nException: {result.exception}") assert result.exit_code == 0 assert "Success: Project 'Shaft Machining Project' created" in result.output assert "shaft machining" in result.output.lower() project_repo = ProjectRepository(db_session) projects = project_repo.get_by_user_id(user.id) assert len(projects) >= 1 created_project = projects[-1] assert created_project.name == 'Shaft Machining Project' assert created_project.main_keyword == 'shaft machining' def test_ingest_cora_with_custom_anchors_cli(self, db_session): """Test CORA ingestion with custom anchor text via CLI""" runner = CliRunner() user_repo = UserRepository(db_session) auth_service = AuthService(user_repo) user = auth_service.create_user_with_hashed_password("testuser", "password123", "User") sample_file = Path("shaft_machining_goog_251011_C_US_L_EN_M3P1A_GMW.xlsx") if not sample_file.exists(): pytest.skip("Sample CORA file not found") result = runner.invoke(app, [ 'ingest-cora', '--file', str(sample_file), '--name', 'Test Project', '--custom-anchors', 'anchor1,anchor2,anchor3', '--username', 'testuser', '--password', 'password123' ]) assert result.exit_code == 0 assert "Custom Anchor Text: anchor1, anchor2, anchor3" in result.output project_repo = ProjectRepository(db_session) projects = project_repo.get_by_user_id(user.id) created_project = projects[-1] assert created_project.custom_anchor_text == ["anchor1", "anchor2", "anchor3"] def test_ingest_cora_authentication_required(self): """Test CORA ingestion requires authentication""" runner = CliRunner() sample_file = Path("shaft_machining_goog_251011_C_US_L_EN_M3P1A_GMW.xlsx") if not sample_file.exists(): pytest.skip("Sample CORA file not found") result = runner.invoke(app, [ 'ingest-cora', '--file', str(sample_file), '--name', 'Test Project', '--username', 'nonexistent', '--password', 'wrongpassword' ]) assert result.exit_code != 0 assert "Authentication failed" in result.output class TestListProjectsCLIIntegration: """Integration tests for list-projects CLI command with real database""" def test_list_projects_user_view(self, db_session): """Test listing projects for regular user""" runner = CliRunner() user_repo = UserRepository(db_session) auth_service = AuthService(user_repo) user = auth_service.create_user_with_hashed_password("testuser", "password123", "User") project_repo = ProjectRepository(db_session) project1 = project_repo.create(user.id, "Project 1", {"main_keyword": "keyword1"}) project2 = project_repo.create(user.id, "Project 2", {"main_keyword": "keyword2"}) result = runner.invoke(app, [ 'list-projects', '--username', 'testuser', '--password', 'password123' ]) assert result.exit_code == 0 assert "Your Projects:" in result.output assert "Project 1" in result.output assert "Project 2" in result.output assert "keyword1" in result.output assert "keyword2" in result.output def test_list_projects_admin_view(self, db_session): """Test listing all projects for admin""" runner = CliRunner() user_repo = UserRepository(db_session) auth_service = AuthService(user_repo) admin = auth_service.create_user_with_hashed_password("admin", "password123", "Admin") user = auth_service.create_user_with_hashed_password("testuser", "password123", "User") project_repo = ProjectRepository(db_session) project1 = project_repo.create(user.id, "User Project", {"main_keyword": "keyword1"}) project2 = project_repo.create(admin.id, "Admin Project", {"main_keyword": "keyword2"}) result = runner.invoke(app, [ 'list-projects', '--username', 'admin', '--password', 'password123' ]) assert result.exit_code == 0 assert "All Projects (Admin View):" in result.output assert "User Project" in result.output assert "Admin Project" in result.output def test_list_projects_empty(self, db_session): """Test listing projects when user has none""" runner = CliRunner() user_repo = UserRepository(db_session) auth_service = AuthService(user_repo) user = auth_service.create_user_with_hashed_password("testuser", "password123", "User") result = runner.invoke(app, [ 'list-projects', '--username', 'testuser', '--password', 'password123' ]) assert result.exit_code == 0 assert "No projects found" in result.output class TestFullCORAWorkflow: """End-to-end workflow tests""" def test_complete_cora_ingestion_workflow(self, db_session): """Test complete workflow: create user, ingest CORA, list projects""" runner = CliRunner() user_repo = UserRepository(db_session) auth_service = AuthService(user_repo) user = auth_service.create_user_with_hashed_password("workflowuser", "password123", "User") sample_file = Path("shaft_machining_goog_251011_C_US_L_EN_M3P1A_GMW.xlsx") if not sample_file.exists(): pytest.skip("Sample CORA file not found") ingest_result = runner.invoke(app, [ 'ingest-cora', '--file', str(sample_file), '--name', 'Workflow Test Project', '--username', 'workflowuser', '--password', 'password123' ]) assert ingest_result.exit_code == 0 assert "Success" in ingest_result.output list_result = runner.invoke(app, [ 'list-projects', '--username', 'workflowuser', '--password', 'password123' ]) assert list_result.exit_code == 0 assert "Workflow Test Project" in list_result.output assert "shaft machining" in list_result.output project_repo = ProjectRepository(db_session) projects = project_repo.get_by_user_id(user.id) assert len(projects) >= 1 project = projects[-1] assert project.main_keyword == "shaft machining" assert project.word_count is not None assert project.entities is not None assert len(project.entities) > 0