183 lines
6.2 KiB
Python
183 lines
6.2 KiB
Python
"""Test pygount determinism with REAL project files."""
|
|
import subprocess
|
|
import json
|
|
import hashlib
|
|
from pathlib import Path
|
|
import tempfile
|
|
import time
|
|
import shutil
|
|
|
|
# Real project directory
|
|
source_dir = Path(r"C:\src\____GitProjects\S1005403_RisCC\target_simulator")
|
|
|
|
if not source_dir.exists():
|
|
print(f"ERROR: Directory not found: {source_dir}")
|
|
exit(1)
|
|
|
|
# Get all Python files
|
|
py_files = list(source_dir.rglob("*.py"))
|
|
print(f"Found {len(py_files)} Python files in {source_dir}")
|
|
print()
|
|
|
|
if len(py_files) == 0:
|
|
print("ERROR: No Python files found!")
|
|
exit(1)
|
|
|
|
# Limit to first 15 files for testing (to keep test reasonable)
|
|
test_files = py_files[:15]
|
|
|
|
def run_pygount_on_file(file_path):
|
|
"""Run pygount and extract counts - NO CACHE."""
|
|
proc = subprocess.run(['pygount', '--format', 'json', str(file_path)],
|
|
capture_output=True, text=True, check=True)
|
|
data = json.loads(proc.stdout)
|
|
|
|
if isinstance(data, list):
|
|
item = data[0]
|
|
else:
|
|
item = data['files'][0]
|
|
|
|
return {
|
|
'lineCount': item.get('lineCount'),
|
|
'sourceCount': item.get('sourceCount'),
|
|
'codeCount': item.get('codeCount'),
|
|
'documentationCount': item.get('documentationCount'),
|
|
'emptyCount': item.get('emptyCount'),
|
|
'language': item.get('language'),
|
|
}
|
|
|
|
print("=" * 80)
|
|
print("REAL PROJECT TEST - NO CACHE")
|
|
print("=" * 80)
|
|
print(f"Testing {len(test_files)} files from real project")
|
|
print()
|
|
|
|
# Calculate content hashes
|
|
file_hashes = {}
|
|
for file_path in test_files:
|
|
with open(file_path, 'rb') as f:
|
|
content = f.read()
|
|
file_hashes[file_path.name] = hashlib.md5(content).hexdigest()
|
|
|
|
# RUN 1: Analyze files in original location
|
|
print("RUN 1: Analyzing files in ORIGINAL location")
|
|
print("-" * 80)
|
|
run1_results = {}
|
|
for file_path in test_files:
|
|
counts = run_pygount_on_file(file_path)
|
|
run1_results[file_path.name] = counts
|
|
print(f" {file_path.name}: code={counts['codeCount']}, doc={counts['documentationCount']}, empty={counts['emptyCount']}")
|
|
print()
|
|
|
|
# Wait 10 seconds
|
|
print("Waiting 10 seconds...")
|
|
time.sleep(10)
|
|
print()
|
|
|
|
# RUN 2: Re-analyze SAME files (same location)
|
|
print("RUN 2: Re-analyzing SAME files (same location, 10s later)")
|
|
print("-" * 80)
|
|
run2_results = {}
|
|
for file_path in test_files:
|
|
counts = run_pygount_on_file(file_path)
|
|
run2_results[file_path.name] = counts
|
|
print(f" {file_path.name}: code={counts['codeCount']}, doc={counts['documentationCount']}, empty={counts['emptyCount']}")
|
|
print()
|
|
|
|
# Copy files to temporary location
|
|
temp_dir = Path(tempfile.mkdtemp(prefix="test_copy_"))
|
|
print(f"Copying files to: {temp_dir}")
|
|
for file_path in test_files:
|
|
dest = temp_dir / file_path.name
|
|
shutil.copy2(file_path, dest)
|
|
print("Files copied")
|
|
print()
|
|
|
|
# Wait 5 seconds
|
|
print("Waiting 5 seconds...")
|
|
time.sleep(5)
|
|
print()
|
|
|
|
# RUN 3: Analyze files in NEW location
|
|
print("RUN 3: Analyzing files in NEW location (different path, same content)")
|
|
print("-" * 80)
|
|
run3_results = {}
|
|
for file_path in test_files:
|
|
new_path = temp_dir / file_path.name
|
|
counts = run_pygount_on_file(new_path)
|
|
run3_results[file_path.name] = counts
|
|
print(f" {file_path.name}: code={counts['codeCount']}, doc={counts['documentationCount']}, empty={counts['emptyCount']}")
|
|
print()
|
|
|
|
# Wait 10 seconds
|
|
print("Waiting 10 seconds...")
|
|
time.sleep(10)
|
|
print()
|
|
|
|
# RUN 4: Re-analyze original location again
|
|
print("RUN 4: Re-analyzing ORIGINAL location again (20s after Run 1)")
|
|
print("-" * 80)
|
|
run4_results = {}
|
|
for file_path in test_files:
|
|
counts = run_pygount_on_file(file_path)
|
|
run4_results[file_path.name] = counts
|
|
print(f" {file_path.name}: code={counts['codeCount']}, doc={counts['documentationCount']}, empty={counts['emptyCount']}")
|
|
print()
|
|
|
|
# ANALYSIS
|
|
print("=" * 80)
|
|
print("DETAILED ANALYSIS")
|
|
print("=" * 80)
|
|
print()
|
|
|
|
differences_found = False
|
|
for file_path in test_files:
|
|
filename = file_path.name
|
|
file_hash = file_hashes[filename]
|
|
|
|
r1 = run1_results[filename]
|
|
r2 = run2_results[filename]
|
|
r3 = run3_results[filename]
|
|
r4 = run4_results[filename]
|
|
|
|
# Check if all numeric values are identical
|
|
if (r1['lineCount'] == r2['lineCount'] == r3['lineCount'] == r4['lineCount'] and
|
|
r1['codeCount'] == r2['codeCount'] == r3['codeCount'] == r4['codeCount'] and
|
|
r1['documentationCount'] == r2['documentationCount'] == r3['documentationCount'] == r4['documentationCount'] and
|
|
r1['emptyCount'] == r2['emptyCount'] == r3['emptyCount'] == r4['emptyCount']):
|
|
# All identical
|
|
print(f"✅ {filename} - CONSISTENT across all runs")
|
|
else:
|
|
print(f"\n❌ {filename} - DIFFERENCES FOUND!")
|
|
print(f" Content hash: {file_hash}")
|
|
print(f" Run 1 (original): code={r1['codeCount']:4d}, doc={r1['documentationCount']:4d}, empty={r1['emptyCount']:4d}, total={r1['lineCount']:4d}")
|
|
print(f" Run 2 (same, 10s): code={r2['codeCount']:4d}, doc={r2['documentationCount']:4d}, empty={r2['emptyCount']:4d}, total={r2['lineCount']:4d}")
|
|
print(f" Run 3 (copy, 15s): code={r3['codeCount']:4d}, doc={r3['documentationCount']:4d}, empty={r3['emptyCount']:4d}, total={r3['lineCount']:4d}")
|
|
print(f" Run 4 (original, 25s): code={r4['codeCount']:4d}, doc={r4['documentationCount']:4d}, empty={r4['emptyCount']:4d}, total={r4['lineCount']:4d}")
|
|
|
|
# Show which pairs differ
|
|
if r1 != r2:
|
|
print(f" ⚠️ Run 1 ≠ Run 2 (same location, different time)")
|
|
if r1 != r3:
|
|
print(f" ⚠️ Run 1 ≠ Run 3 (different location)")
|
|
if r1 != r4:
|
|
print(f" ⚠️ Run 1 ≠ Run 4 (same location, much later)")
|
|
|
|
differences_found = True
|
|
|
|
print()
|
|
print("=" * 80)
|
|
print("FINAL CONCLUSION")
|
|
print("=" * 80)
|
|
if not differences_found:
|
|
print("✅ SUCCESS: All files produced IDENTICAL results across all runs")
|
|
print(" → pygount IS deterministic with real project files")
|
|
else:
|
|
print("❌ FAILURE: Some files produced DIFFERENT results!")
|
|
print(" → pygount is NOT deterministic with these real files")
|
|
print(" → This explains why baseline comparison shows differences")
|
|
print(" → SOLUTION NEEDED: Either fix pygount usage or find alternative")
|
|
|
|
# Cleanup
|
|
shutil.rmtree(temp_dir)
|