"""Tests for keyword normalization and fuzzy matching.""" from __future__ import annotations from link_building_workflow.matching import ( filename_stem_to_keyword, fuzzy_keyword_match, normalize_for_match, ) class TestNormalizeForMatch: def test_lowercases(self): assert normalize_for_match("Hello World") == "hello world" def test_strips_punctuation(self): assert normalize_for_match("hello, world!") == "hello world" def test_collapses_whitespace(self): assert normalize_for_match("hello world\n\ttest") == "hello world test" def test_empty_string(self): assert normalize_for_match("") == "" def test_only_punctuation(self): assert normalize_for_match("!!!...,,,") == "" def test_numbers_preserved(self): assert normalize_for_match("5-axis cnc") == "5 axis cnc" def test_leading_trailing_whitespace(self): assert normalize_for_match(" hello world ") == "hello world" class TestFuzzyKeywordMatch: def test_exact_match(self, llm_never): assert fuzzy_keyword_match("cnc machining", "cnc machining", llm_never) is True def test_different_no_llm(self): assert fuzzy_keyword_match("cnc", "cnc machining") is False def test_different_llm_says_no(self, llm_never): assert fuzzy_keyword_match("cnc", "milling", llm_never) is False def test_different_llm_says_yes(self, llm_always): # LLM callable gets to decide when exact match fails assert fuzzy_keyword_match("shaft", "shafts", llm_always) is True def test_empty_a(self, llm_always): assert fuzzy_keyword_match("", "cnc", llm_always) is False def test_empty_b(self, llm_always): assert fuzzy_keyword_match("cnc", "", llm_always) is False def test_both_empty(self, llm_always): # Even with llm_always, empty inputs short-circuit to False assert fuzzy_keyword_match("", "", llm_always) is False def test_no_llm_check_fast_path_hit(self): # When no llm_check provided, exact matches still work assert fuzzy_keyword_match("same", "same") is True def test_no_llm_check_fast_path_miss(self): # When no llm_check and not exact, returns False assert fuzzy_keyword_match("same", "different") is False def test_llm_check_only_called_when_needed(self): calls = [] def tracking_llm(a, b): calls.append((a, b)) return True # Exact match: LLM should not be called fuzzy_keyword_match("cnc", "cnc", tracking_llm) assert calls == [] # Different: LLM should be called once fuzzy_keyword_match("shaft", "shafts", tracking_llm) assert calls == [("shaft", "shafts")] class TestFilenameStemToKeyword: def test_hyphens_to_spaces(self): assert filename_stem_to_keyword("precision-cnc-machining") == "precision cnc machining" def test_underscores_to_spaces(self): assert filename_stem_to_keyword("precision_cnc_machining") == "precision cnc machining" def test_mixed_separators(self): assert filename_stem_to_keyword("precision-cnc_machining") == "precision cnc machining" def test_uppercase(self): assert filename_stem_to_keyword("CNC-Machining") == "cnc machining" def test_empty(self): assert filename_stem_to_keyword("") == ""