SXXXXXXX_PyUCC/tests/test_real_project_files.py

211 lines
6.4 KiB
Python

"""Test pygount determinism with REAL project files."""
import subprocess
import json
import hashlib
from pathlib import Path
import tempfile
import time
import shutil
# Real project directory
source_dir = Path(r"C:\src\____GitProjects\S1005403_RisCC\target_simulator")
if not source_dir.exists():
print(f"ERROR: Directory not found: {source_dir}")
exit(1)
# Get all Python files
py_files = list(source_dir.rglob("*.py"))
print(f"Found {len(py_files)} Python files in {source_dir}")
print()
if len(py_files) == 0:
print("ERROR: No Python files found!")
exit(1)
# Limit to first 15 files for testing (to keep test reasonable)
test_files = py_files[:15]
def run_pygount_on_file(file_path):
"""Run pygount and extract counts - NO CACHE."""
proc = subprocess.run(
["pygount", "--format", "json", str(file_path)],
capture_output=True,
text=True,
check=True,
)
data = json.loads(proc.stdout)
if isinstance(data, list):
item = data[0]
else:
item = data["files"][0]
return {
"lineCount": item.get("lineCount"),
"sourceCount": item.get("sourceCount"),
"codeCount": item.get("codeCount"),
"documentationCount": item.get("documentationCount"),
"emptyCount": item.get("emptyCount"),
"language": item.get("language"),
}
print("=" * 80)
print("REAL PROJECT TEST - NO CACHE")
print("=" * 80)
print(f"Testing {len(test_files)} files from real project")
print()
# Calculate content hashes
file_hashes = {}
for file_path in test_files:
with open(file_path, "rb") as f:
content = f.read()
file_hashes[file_path.name] = hashlib.md5(content).hexdigest()
# RUN 1: Analyze files in original location
print("RUN 1: Analyzing files in ORIGINAL location")
print("-" * 80)
run1_results = {}
for file_path in test_files:
counts = run_pygount_on_file(file_path)
run1_results[file_path.name] = counts
print(
f" {file_path.name}: code={counts['codeCount']}, doc={counts['documentationCount']}, empty={counts['emptyCount']}"
)
print()
# Wait 10 seconds
print("Waiting 10 seconds...")
time.sleep(10)
print()
# RUN 2: Re-analyze SAME files (same location)
print("RUN 2: Re-analyzing SAME files (same location, 10s later)")
print("-" * 80)
run2_results = {}
for file_path in test_files:
counts = run_pygount_on_file(file_path)
run2_results[file_path.name] = counts
print(
f" {file_path.name}: code={counts['codeCount']}, doc={counts['documentationCount']}, empty={counts['emptyCount']}"
)
print()
# Copy files to temporary location
temp_dir = Path(tempfile.mkdtemp(prefix="test_copy_"))
print(f"Copying files to: {temp_dir}")
for file_path in test_files:
dest = temp_dir / file_path.name
shutil.copy2(file_path, dest)
print("Files copied")
print()
# Wait 5 seconds
print("Waiting 5 seconds...")
time.sleep(5)
print()
# RUN 3: Analyze files in NEW location
print("RUN 3: Analyzing files in NEW location (different path, same content)")
print("-" * 80)
run3_results = {}
for file_path in test_files:
new_path = temp_dir / file_path.name
counts = run_pygount_on_file(new_path)
run3_results[file_path.name] = counts
print(
f" {file_path.name}: code={counts['codeCount']}, doc={counts['documentationCount']}, empty={counts['emptyCount']}"
)
print()
# Wait 10 seconds
print("Waiting 10 seconds...")
time.sleep(10)
print()
# RUN 4: Re-analyze original location again
print("RUN 4: Re-analyzing ORIGINAL location again (20s after Run 1)")
print("-" * 80)
run4_results = {}
for file_path in test_files:
counts = run_pygount_on_file(file_path)
run4_results[file_path.name] = counts
print(
f" {file_path.name}: code={counts['codeCount']}, doc={counts['documentationCount']}, empty={counts['emptyCount']}"
)
print()
# ANALYSIS
print("=" * 80)
print("DETAILED ANALYSIS")
print("=" * 80)
print()
differences_found = False
for file_path in test_files:
filename = file_path.name
file_hash = file_hashes[filename]
r1 = run1_results[filename]
r2 = run2_results[filename]
r3 = run3_results[filename]
r4 = run4_results[filename]
# Check if all numeric values are identical
if (
r1["lineCount"] == r2["lineCount"] == r3["lineCount"] == r4["lineCount"]
and r1["codeCount"] == r2["codeCount"] == r3["codeCount"] == r4["codeCount"]
and r1["documentationCount"]
== r2["documentationCount"]
== r3["documentationCount"]
== r4["documentationCount"]
and r1["emptyCount"] == r2["emptyCount"] == r3["emptyCount"] == r4["emptyCount"]
):
# All identical
print(f"{filename} - CONSISTENT across all runs")
else:
print(f"\n{filename} - DIFFERENCES FOUND!")
print(f" Content hash: {file_hash}")
print(
f" Run 1 (original): code={r1['codeCount']:4d}, doc={r1['documentationCount']:4d}, empty={r1['emptyCount']:4d}, total={r1['lineCount']:4d}"
)
print(
f" Run 2 (same, 10s): code={r2['codeCount']:4d}, doc={r2['documentationCount']:4d}, empty={r2['emptyCount']:4d}, total={r2['lineCount']:4d}"
)
print(
f" Run 3 (copy, 15s): code={r3['codeCount']:4d}, doc={r3['documentationCount']:4d}, empty={r3['emptyCount']:4d}, total={r3['lineCount']:4d}"
)
print(
f" Run 4 (original, 25s): code={r4['codeCount']:4d}, doc={r4['documentationCount']:4d}, empty={r4['emptyCount']:4d}, total={r4['lineCount']:4d}"
)
# Show which pairs differ
if r1 != r2:
print(f" ⚠️ Run 1 ≠ Run 2 (same location, different time)")
if r1 != r3:
print(f" ⚠️ Run 1 ≠ Run 3 (different location)")
if r1 != r4:
print(f" ⚠️ Run 1 ≠ Run 4 (same location, much later)")
differences_found = True
print()
print("=" * 80)
print("FINAL CONCLUSION")
print("=" * 80)
if not differences_found:
print("✅ SUCCESS: All files produced IDENTICAL results across all runs")
print(" → pygount IS deterministic with real project files")
else:
print("❌ FAILURE: Some files produced DIFFERENT results!")
print(" → pygount is NOT deterministic with these real files")
print(" → This explains why baseline comparison shows differences")
print(" → SOLUTION NEEDED: Either fix pygount usage or find alternative")
# Cleanup
shutil.rmtree(temp_dir)