25 lines
943 B
Python
25 lines
943 B
Python
import pytest
|
|
|
|
from pyucc.core import duplicates as dupmod
|
|
|
|
|
|
def test_fingerprints_vary_with_k():
|
|
text = "def func():\n return 1\n\n# comment\nprint(func())\n"
|
|
# small k should produce more k-grams (and likely more fingerprints)
|
|
fps_small = dupmod._fingerprints_for_text(text, k=3, window=4)
|
|
fps_large = dupmod._fingerprints_for_text(text, k=15, window=4)
|
|
assert isinstance(fps_small, set)
|
|
assert isinstance(fps_large, set)
|
|
# Expect different fingerprint sets
|
|
assert fps_small != fps_large
|
|
|
|
|
|
def test_fingerprints_vary_with_window():
|
|
text = "line1 line2 line3 line4 line5 line6 line7"
|
|
fps_w1 = dupmod._fingerprints_for_text(text, k=5, window=1)
|
|
fps_w4 = dupmod._fingerprints_for_text(text, k=5, window=4)
|
|
assert isinstance(fps_w1, set)
|
|
assert isinstance(fps_w4, set)
|
|
# window=1 should effectively select many more fingerprints than larger window
|
|
assert len(fps_w1) >= len(fps_w4)
|