211 lines
6.4 KiB
Python
211 lines
6.4 KiB
Python
"""Test pygount determinism with REAL project files."""
|
|
|
|
import subprocess
|
|
import json
|
|
import hashlib
|
|
from pathlib import Path
|
|
import tempfile
|
|
import time
|
|
import shutil
|
|
|
|
# Real project directory
|
|
source_dir = Path(r"C:\src\____GitProjects\S1005403_RisCC\target_simulator")
|
|
|
|
if not source_dir.exists():
|
|
print(f"ERROR: Directory not found: {source_dir}")
|
|
exit(1)
|
|
|
|
# Get all Python files
|
|
py_files = list(source_dir.rglob("*.py"))
|
|
print(f"Found {len(py_files)} Python files in {source_dir}")
|
|
print()
|
|
|
|
if len(py_files) == 0:
|
|
print("ERROR: No Python files found!")
|
|
exit(1)
|
|
|
|
# Limit to first 15 files for testing (to keep test reasonable)
|
|
test_files = py_files[:15]
|
|
|
|
|
|
def run_pygount_on_file(file_path):
|
|
"""Run pygount and extract counts - NO CACHE."""
|
|
proc = subprocess.run(
|
|
["pygount", "--format", "json", str(file_path)],
|
|
capture_output=True,
|
|
text=True,
|
|
check=True,
|
|
)
|
|
data = json.loads(proc.stdout)
|
|
|
|
if isinstance(data, list):
|
|
item = data[0]
|
|
else:
|
|
item = data["files"][0]
|
|
|
|
return {
|
|
"lineCount": item.get("lineCount"),
|
|
"sourceCount": item.get("sourceCount"),
|
|
"codeCount": item.get("codeCount"),
|
|
"documentationCount": item.get("documentationCount"),
|
|
"emptyCount": item.get("emptyCount"),
|
|
"language": item.get("language"),
|
|
}
|
|
|
|
|
|
print("=" * 80)
|
|
print("REAL PROJECT TEST - NO CACHE")
|
|
print("=" * 80)
|
|
print(f"Testing {len(test_files)} files from real project")
|
|
print()
|
|
|
|
# Calculate content hashes
|
|
file_hashes = {}
|
|
for file_path in test_files:
|
|
with open(file_path, "rb") as f:
|
|
content = f.read()
|
|
file_hashes[file_path.name] = hashlib.md5(content).hexdigest()
|
|
|
|
# RUN 1: Analyze files in original location
|
|
print("RUN 1: Analyzing files in ORIGINAL location")
|
|
print("-" * 80)
|
|
run1_results = {}
|
|
for file_path in test_files:
|
|
counts = run_pygount_on_file(file_path)
|
|
run1_results[file_path.name] = counts
|
|
print(
|
|
f" {file_path.name}: code={counts['codeCount']}, doc={counts['documentationCount']}, empty={counts['emptyCount']}"
|
|
)
|
|
print()
|
|
|
|
# Wait 10 seconds
|
|
print("Waiting 10 seconds...")
|
|
time.sleep(10)
|
|
print()
|
|
|
|
# RUN 2: Re-analyze SAME files (same location)
|
|
print("RUN 2: Re-analyzing SAME files (same location, 10s later)")
|
|
print("-" * 80)
|
|
run2_results = {}
|
|
for file_path in test_files:
|
|
counts = run_pygount_on_file(file_path)
|
|
run2_results[file_path.name] = counts
|
|
print(
|
|
f" {file_path.name}: code={counts['codeCount']}, doc={counts['documentationCount']}, empty={counts['emptyCount']}"
|
|
)
|
|
print()
|
|
|
|
# Copy files to temporary location
|
|
temp_dir = Path(tempfile.mkdtemp(prefix="test_copy_"))
|
|
print(f"Copying files to: {temp_dir}")
|
|
for file_path in test_files:
|
|
dest = temp_dir / file_path.name
|
|
shutil.copy2(file_path, dest)
|
|
print("Files copied")
|
|
print()
|
|
|
|
# Wait 5 seconds
|
|
print("Waiting 5 seconds...")
|
|
time.sleep(5)
|
|
print()
|
|
|
|
# RUN 3: Analyze files in NEW location
|
|
print("RUN 3: Analyzing files in NEW location (different path, same content)")
|
|
print("-" * 80)
|
|
run3_results = {}
|
|
for file_path in test_files:
|
|
new_path = temp_dir / file_path.name
|
|
counts = run_pygount_on_file(new_path)
|
|
run3_results[file_path.name] = counts
|
|
print(
|
|
f" {file_path.name}: code={counts['codeCount']}, doc={counts['documentationCount']}, empty={counts['emptyCount']}"
|
|
)
|
|
print()
|
|
|
|
# Wait 10 seconds
|
|
print("Waiting 10 seconds...")
|
|
time.sleep(10)
|
|
print()
|
|
|
|
# RUN 4: Re-analyze original location again
|
|
print("RUN 4: Re-analyzing ORIGINAL location again (20s after Run 1)")
|
|
print("-" * 80)
|
|
run4_results = {}
|
|
for file_path in test_files:
|
|
counts = run_pygount_on_file(file_path)
|
|
run4_results[file_path.name] = counts
|
|
print(
|
|
f" {file_path.name}: code={counts['codeCount']}, doc={counts['documentationCount']}, empty={counts['emptyCount']}"
|
|
)
|
|
print()
|
|
|
|
# ANALYSIS
|
|
print("=" * 80)
|
|
print("DETAILED ANALYSIS")
|
|
print("=" * 80)
|
|
print()
|
|
|
|
differences_found = False
|
|
for file_path in test_files:
|
|
filename = file_path.name
|
|
file_hash = file_hashes[filename]
|
|
|
|
r1 = run1_results[filename]
|
|
r2 = run2_results[filename]
|
|
r3 = run3_results[filename]
|
|
r4 = run4_results[filename]
|
|
|
|
# Check if all numeric values are identical
|
|
if (
|
|
r1["lineCount"] == r2["lineCount"] == r3["lineCount"] == r4["lineCount"]
|
|
and r1["codeCount"] == r2["codeCount"] == r3["codeCount"] == r4["codeCount"]
|
|
and r1["documentationCount"]
|
|
== r2["documentationCount"]
|
|
== r3["documentationCount"]
|
|
== r4["documentationCount"]
|
|
and r1["emptyCount"] == r2["emptyCount"] == r3["emptyCount"] == r4["emptyCount"]
|
|
):
|
|
# All identical
|
|
print(f"✅ {filename} - CONSISTENT across all runs")
|
|
else:
|
|
print(f"\n❌ {filename} - DIFFERENCES FOUND!")
|
|
print(f" Content hash: {file_hash}")
|
|
print(
|
|
f" Run 1 (original): code={r1['codeCount']:4d}, doc={r1['documentationCount']:4d}, empty={r1['emptyCount']:4d}, total={r1['lineCount']:4d}"
|
|
)
|
|
print(
|
|
f" Run 2 (same, 10s): code={r2['codeCount']:4d}, doc={r2['documentationCount']:4d}, empty={r2['emptyCount']:4d}, total={r2['lineCount']:4d}"
|
|
)
|
|
print(
|
|
f" Run 3 (copy, 15s): code={r3['codeCount']:4d}, doc={r3['documentationCount']:4d}, empty={r3['emptyCount']:4d}, total={r3['lineCount']:4d}"
|
|
)
|
|
print(
|
|
f" Run 4 (original, 25s): code={r4['codeCount']:4d}, doc={r4['documentationCount']:4d}, empty={r4['emptyCount']:4d}, total={r4['lineCount']:4d}"
|
|
)
|
|
|
|
# Show which pairs differ
|
|
if r1 != r2:
|
|
print(f" ⚠️ Run 1 ≠ Run 2 (same location, different time)")
|
|
if r1 != r3:
|
|
print(f" ⚠️ Run 1 ≠ Run 3 (different location)")
|
|
if r1 != r4:
|
|
print(f" ⚠️ Run 1 ≠ Run 4 (same location, much later)")
|
|
|
|
differences_found = True
|
|
|
|
print()
|
|
print("=" * 80)
|
|
print("FINAL CONCLUSION")
|
|
print("=" * 80)
|
|
if not differences_found:
|
|
print("✅ SUCCESS: All files produced IDENTICAL results across all runs")
|
|
print(" → pygount IS deterministic with real project files")
|
|
else:
|
|
print("❌ FAILURE: Some files produced DIFFERENT results!")
|
|
print(" → pygount is NOT deterministic with these real files")
|
|
print(" → This explains why baseline comparison shows differences")
|
|
print(" → SOLUTION NEEDED: Either fix pygount usage or find alternative")
|
|
|
|
# Cleanup
|
|
shutil.rmtree(temp_dir)
|