137 lines
4.0 KiB
Python
137 lines
4.0 KiB
Python
"""Deep test of pygount determinism - check EXACT output."""
|
|
|
|
import subprocess
|
|
import json
|
|
import hashlib
|
|
from pathlib import Path
|
|
import tempfile
|
|
import time
|
|
|
|
# Create a test file with known content
|
|
test_content = '''"""Test module."""
|
|
|
|
def function_one():
|
|
"""Function one."""
|
|
x = 1
|
|
y = 2
|
|
return x + y
|
|
|
|
class MyClass:
|
|
"""A class."""
|
|
|
|
def method(self):
|
|
"""Method."""
|
|
# Comment line
|
|
pass
|
|
|
|
if __name__ == "__main__":
|
|
print("Hello")
|
|
'''
|
|
|
|
temp_file = Path(tempfile.mktemp(suffix=".py"))
|
|
temp_file.write_text(test_content, encoding="utf-8")
|
|
|
|
print(f"Test file: {temp_file}")
|
|
print(f"Content hash: {hashlib.md5(test_content.encode()).hexdigest()}")
|
|
print()
|
|
|
|
# Run pygount 10 times and collect EXACT JSON output
|
|
results = []
|
|
for i in range(10):
|
|
proc = subprocess.run(
|
|
["pygount", "--format", "json", str(temp_file)],
|
|
capture_output=True,
|
|
text=True,
|
|
check=True,
|
|
)
|
|
|
|
# Parse JSON
|
|
data = json.loads(proc.stdout)
|
|
|
|
# Extract the file entry
|
|
if isinstance(data, list):
|
|
item = data[0]
|
|
else:
|
|
item = data["files"][0]
|
|
|
|
results.append(
|
|
{
|
|
"run": i + 1,
|
|
"raw_json": proc.stdout,
|
|
"lineCount": item.get("lineCount"),
|
|
"sourceCount": item.get("sourceCount"),
|
|
"codeCount": item.get("codeCount"),
|
|
"documentationCount": item.get("documentationCount"),
|
|
"emptyCount": item.get("emptyCount"),
|
|
"stringCount": item.get("stringCount"),
|
|
"language": item.get("language"),
|
|
}
|
|
)
|
|
|
|
print(
|
|
f"Run {i+1}: code={item.get('codeCount')}, doc={item.get('documentationCount')}, "
|
|
f"empty={item.get('emptyCount')}, source={item.get('sourceCount')}"
|
|
)
|
|
|
|
# Check if ALL runs produced IDENTICAL results
|
|
print()
|
|
print("=" * 70)
|
|
print("CHECKING FOR DIFFERENCES")
|
|
print("=" * 70)
|
|
|
|
first = results[0]
|
|
all_identical = True
|
|
|
|
for i, result in enumerate(results[1:], 2):
|
|
if (
|
|
result["lineCount"] != first["lineCount"]
|
|
or result["sourceCount"] != first["sourceCount"]
|
|
or result["codeCount"] != first["codeCount"]
|
|
or result["documentationCount"] != first["documentationCount"]
|
|
or result["emptyCount"] != first["emptyCount"]
|
|
):
|
|
|
|
print(f"\n❌ DIFFERENCE FOUND in run {i}:")
|
|
print(
|
|
f" Run 1: lineCount={first['lineCount']}, sourceCount={first['sourceCount']}, "
|
|
f"codeCount={first['codeCount']}, doc={first['documentationCount']}, empty={first['emptyCount']}"
|
|
)
|
|
print(
|
|
f" Run {i}: lineCount={result['lineCount']}, sourceCount={result['sourceCount']}, "
|
|
f"codeCount={result['codeCount']}, doc={result['documentationCount']}, empty={result['emptyCount']}"
|
|
)
|
|
all_identical = False
|
|
|
|
# Check if raw JSON is byte-for-byte identical
|
|
json_hashes = [hashlib.md5(r["raw_json"].encode()).hexdigest() for r in results]
|
|
unique_hashes = set(json_hashes)
|
|
|
|
print()
|
|
if len(unique_hashes) == 1:
|
|
print("✅ All JSON outputs are BYTE-FOR-BYTE IDENTICAL")
|
|
print(f" JSON hash: {json_hashes[0]}")
|
|
else:
|
|
print(f"❌ Found {len(unique_hashes)} DIFFERENT JSON outputs:")
|
|
for h in unique_hashes:
|
|
count = json_hashes.count(h)
|
|
print(f" Hash {h[:8]}... appeared {count} times")
|
|
|
|
# Show first difference in detail
|
|
print("\n First two different outputs:")
|
|
for i, result in enumerate(results):
|
|
if json_hashes[i] != json_hashes[0]:
|
|
print(f"\n Run 1 JSON:\n{results[0]['raw_json']}")
|
|
print(f"\n Run {i+1} JSON:\n{result['raw_json']}")
|
|
break
|
|
|
|
print()
|
|
if all_identical:
|
|
print("🎉 CONCLUSION: pygount IS deterministic (all numeric values identical)")
|
|
else:
|
|
print("⚠️ CONCLUSION: pygount is NOT deterministic (values differ between runs)")
|
|
print(" → This tool cannot be used for reliable baseline comparison")
|
|
print(" → Recommend switching to a deterministic counting library")
|
|
|
|
# Cleanup
|
|
temp_file.unlink()
|