import pytest from pyucc.core import duplicates as dupmod def test_fingerprints_vary_with_k(): text = "def func():\n return 1\n\n# comment\nprint(func())\n" # small k should produce more k-grams (and likely more fingerprints) fps_small = dupmod._fingerprints_for_text(text, k=3, window=4) fps_large = dupmod._fingerprints_for_text(text, k=15, window=4) assert isinstance(fps_small, set) assert isinstance(fps_large, set) # Expect different fingerprint sets assert fps_small != fps_large def test_fingerprints_vary_with_window(): text = "line1 line2 line3 line4 line5 line6 line7" fps_w1 = dupmod._fingerprints_for_text(text, k=5, window=1) fps_w4 = dupmod._fingerprints_for_text(text, k=5, window=4) assert isinstance(fps_w1, set) assert isinstance(fps_w4, set) # window=1 should effectively select many more fingerprints than larger window assert len(fps_w1) >= len(fps_w4)