"""Tests for inbox folder scanning and keyword-based file lookup.""" from __future__ import annotations from pathlib import Path import pytest from link_building_workflow.inbox import ( find_all_xlsx_for_keyword, find_xlsx_for_keyword, list_inbox_xlsx, ) @pytest.fixture() def inbox(tmp_path: Path) -> Path: """An empty inbox folder.""" d = tmp_path / "cora-inbox" d.mkdir() return d def _touch(folder: Path, name: str) -> Path: p = folder / name p.write_bytes(b"fake xlsx") return p class TestListInboxXlsx: def test_missing_folder_returns_empty(self, tmp_path: Path): assert list_inbox_xlsx(tmp_path / "does-not-exist") == [] def test_empty_folder(self, inbox: Path): assert list_inbox_xlsx(inbox) == [] def test_lists_xlsx_only(self, inbox: Path): _touch(inbox, "a.xlsx") _touch(inbox, "readme.txt") _touch(inbox, "b.xlsx") result = list_inbox_xlsx(inbox) names = [p.name for p in result] assert names == ["a.xlsx", "b.xlsx"] def test_skips_office_lock_files(self, inbox: Path): _touch(inbox, "real.xlsx") _touch(inbox, "~$real.xlsx") result = list_inbox_xlsx(inbox) assert [p.name for p in result] == ["real.xlsx"] def test_skips_processed_by_default(self, inbox: Path): _touch(inbox, "new.xlsx") processed = inbox / "processed" processed.mkdir() _touch(processed, "old.xlsx") # Also duplicate the name in root to prove it gets filtered _touch(inbox, "old.xlsx") result = list_inbox_xlsx(inbox) assert [p.name for p in result] == ["new.xlsx"] def test_skip_processed_disabled(self, inbox: Path): _touch(inbox, "new.xlsx") processed = inbox / "processed" processed.mkdir() _touch(inbox, "old.xlsx") # same name as one we "processed" result = list_inbox_xlsx(inbox, skip_processed=False) assert sorted(p.name for p in result) == ["new.xlsx", "old.xlsx"] def test_sorted_output(self, inbox: Path): _touch(inbox, "c.xlsx") _touch(inbox, "a.xlsx") _touch(inbox, "b.xlsx") result = list_inbox_xlsx(inbox) assert [p.name for p in result] == ["a.xlsx", "b.xlsx", "c.xlsx"] class TestFindXlsxForKeyword: def test_exact_match(self, inbox: Path, llm_never): _touch(inbox, "precision-cnc-machining.xlsx") match = find_xlsx_for_keyword(inbox, "precision cnc machining", llm_never) assert match is not None assert match.filename == "precision-cnc-machining.xlsx" assert match.stem_keyword == "precision cnc machining" def test_no_match(self, inbox: Path, llm_never): _touch(inbox, "other-keyword.xlsx") match = find_xlsx_for_keyword(inbox, "cnc machining", llm_never) assert match is None def test_missing_folder(self, tmp_path: Path, llm_never): match = find_xlsx_for_keyword( tmp_path / "no-such-dir", "cnc machining", llm_never ) assert match is None def test_empty_keyword(self, inbox: Path, llm_never): _touch(inbox, "anything.xlsx") match = find_xlsx_for_keyword(inbox, "", llm_never) assert match is None def test_keyword_with_hyphens(self, inbox: Path, llm_never): # Caller may pass the keyword in hyphenated form; should still match _touch(inbox, "precision-cnc-machining.xlsx") match = find_xlsx_for_keyword( inbox, "precision-cnc-machining", llm_never ) assert match is not None def test_keyword_case_insensitive(self, inbox: Path, llm_never): _touch(inbox, "cnc-machining.xlsx") match = find_xlsx_for_keyword(inbox, "CNC Machining", llm_never) assert match is not None def test_plural_match_via_llm(self, inbox: Path): _touch(inbox, "cnc-shafts.xlsx") def only_plural_of_shaft(a: str, b: str) -> bool: return {a, b} == {"cnc shaft", "cnc shafts"} # Singular keyword should match the plural filename via LLM match = find_xlsx_for_keyword(inbox, "cnc shaft", only_plural_of_shaft) assert match is not None assert match.filename == "cnc-shafts.xlsx" def test_first_match_returned(self, inbox: Path, llm_never): # Two xlsx files both match; sorted order picks "a..." first _touch(inbox, "b-cnc-machining.xlsx") _touch(inbox, "a-cnc-machining.xlsx") # These don't fuzzy match the keyword "cnc machining" because of # the a-/b- prefix. So use a real collision: _touch(inbox, "cnc-machining.xlsx") match = find_xlsx_for_keyword(inbox, "cnc machining", llm_never) assert match is not None assert match.filename == "cnc-machining.xlsx" def test_processed_files_ignored(self, inbox: Path, llm_never): processed = inbox / "processed" processed.mkdir() _touch(processed, "cnc-machining.xlsx") _touch(inbox, "cnc-machining.xlsx") # Inbox file with same name as processed one is also skipped by # list_inbox_xlsx, so no match available match = find_xlsx_for_keyword(inbox, "cnc machining", llm_never) assert match is None class TestFindAllXlsxForKeyword: def test_returns_all_matches(self, inbox: Path): _touch(inbox, "cnc-shaft.xlsx") _touch(inbox, "cnc-shafts.xlsx") _touch(inbox, "unrelated.xlsx") def plural_ok(a: str, b: str) -> bool: return {a, b} == {"cnc shaft", "cnc shafts"} results = find_all_xlsx_for_keyword(inbox, "cnc shaft", plural_ok) names = sorted(r.filename for r in results) assert names == ["cnc-shaft.xlsx", "cnc-shafts.xlsx"] def test_empty_when_no_matches(self, inbox: Path, llm_never): _touch(inbox, "unrelated.xlsx") results = find_all_xlsx_for_keyword(inbox, "cnc shaft", llm_never) assert results == [] def test_empty_keyword_returns_empty(self, inbox: Path, llm_never): _touch(inbox, "anything.xlsx") results = find_all_xlsx_for_keyword(inbox, "", llm_never) assert results == []