Linkman-Paperclip-Wrap/tests/test_matching.py

98 lines
3.3 KiB
Python

"""Tests for keyword normalization and fuzzy matching."""
from __future__ import annotations
from link_building_workflow.matching import (
filename_stem_to_keyword,
fuzzy_keyword_match,
normalize_for_match,
)
class TestNormalizeForMatch:
def test_lowercases(self):
assert normalize_for_match("Hello World") == "hello world"
def test_strips_punctuation(self):
assert normalize_for_match("hello, world!") == "hello world"
def test_collapses_whitespace(self):
assert normalize_for_match("hello world\n\ttest") == "hello world test"
def test_empty_string(self):
assert normalize_for_match("") == ""
def test_only_punctuation(self):
assert normalize_for_match("!!!...,,,") == ""
def test_numbers_preserved(self):
assert normalize_for_match("5-axis cnc") == "5 axis cnc"
def test_leading_trailing_whitespace(self):
assert normalize_for_match(" hello world ") == "hello world"
class TestFuzzyKeywordMatch:
def test_exact_match(self, llm_never):
assert fuzzy_keyword_match("cnc machining", "cnc machining", llm_never) is True
def test_different_no_llm(self):
assert fuzzy_keyword_match("cnc", "cnc machining") is False
def test_different_llm_says_no(self, llm_never):
assert fuzzy_keyword_match("cnc", "milling", llm_never) is False
def test_different_llm_says_yes(self, llm_always):
# LLM callable gets to decide when exact match fails
assert fuzzy_keyword_match("shaft", "shafts", llm_always) is True
def test_empty_a(self, llm_always):
assert fuzzy_keyword_match("", "cnc", llm_always) is False
def test_empty_b(self, llm_always):
assert fuzzy_keyword_match("cnc", "", llm_always) is False
def test_both_empty(self, llm_always):
# Even with llm_always, empty inputs short-circuit to False
assert fuzzy_keyword_match("", "", llm_always) is False
def test_no_llm_check_fast_path_hit(self):
# When no llm_check provided, exact matches still work
assert fuzzy_keyword_match("same", "same") is True
def test_no_llm_check_fast_path_miss(self):
# When no llm_check and not exact, returns False
assert fuzzy_keyword_match("same", "different") is False
def test_llm_check_only_called_when_needed(self):
calls = []
def tracking_llm(a, b):
calls.append((a, b))
return True
# Exact match: LLM should not be called
fuzzy_keyword_match("cnc", "cnc", tracking_llm)
assert calls == []
# Different: LLM should be called once
fuzzy_keyword_match("shaft", "shafts", tracking_llm)
assert calls == [("shaft", "shafts")]
class TestFilenameStemToKeyword:
def test_hyphens_to_spaces(self):
assert filename_stem_to_keyword("precision-cnc-machining") == "precision cnc machining"
def test_underscores_to_spaces(self):
assert filename_stem_to_keyword("precision_cnc_machining") == "precision cnc machining"
def test_mixed_separators(self):
assert filename_stem_to_keyword("precision-cnc_machining") == "precision cnc machining"
def test_uppercase(self):
assert filename_stem_to_keyword("CNC-Machining") == "cnc machining"
def test_empty(self):
assert filename_stem_to_keyword("") == ""