"""Test pygount determinism with REAL project files.""" import subprocess import json import hashlib from pathlib import Path import tempfile import time import shutil # Real project directory source_dir = Path(r"C:\src\____GitProjects\S1005403_RisCC\target_simulator") if not source_dir.exists(): print(f"ERROR: Directory not found: {source_dir}") exit(1) # Get all Python files py_files = list(source_dir.rglob("*.py")) print(f"Found {len(py_files)} Python files in {source_dir}") print() if len(py_files) == 0: print("ERROR: No Python files found!") exit(1) # Limit to first 15 files for testing (to keep test reasonable) test_files = py_files[:15] def run_pygount_on_file(file_path): """Run pygount and extract counts - NO CACHE.""" proc = subprocess.run( ["pygount", "--format", "json", str(file_path)], capture_output=True, text=True, check=True, ) data = json.loads(proc.stdout) if isinstance(data, list): item = data[0] else: item = data["files"][0] return { "lineCount": item.get("lineCount"), "sourceCount": item.get("sourceCount"), "codeCount": item.get("codeCount"), "documentationCount": item.get("documentationCount"), "emptyCount": item.get("emptyCount"), "language": item.get("language"), } print("=" * 80) print("REAL PROJECT TEST - NO CACHE") print("=" * 80) print(f"Testing {len(test_files)} files from real project") print() # Calculate content hashes file_hashes = {} for file_path in test_files: with open(file_path, "rb") as f: content = f.read() file_hashes[file_path.name] = hashlib.md5(content).hexdigest() # RUN 1: Analyze files in original location print("RUN 1: Analyzing files in ORIGINAL location") print("-" * 80) run1_results = {} for file_path in test_files: counts = run_pygount_on_file(file_path) run1_results[file_path.name] = counts print( f" {file_path.name}: code={counts['codeCount']}, doc={counts['documentationCount']}, empty={counts['emptyCount']}" ) print() # Wait 10 seconds print("Waiting 10 seconds...") time.sleep(10) print() # RUN 2: Re-analyze SAME files (same location) print("RUN 2: Re-analyzing SAME files (same location, 10s later)") print("-" * 80) run2_results = {} for file_path in test_files: counts = run_pygount_on_file(file_path) run2_results[file_path.name] = counts print( f" {file_path.name}: code={counts['codeCount']}, doc={counts['documentationCount']}, empty={counts['emptyCount']}" ) print() # Copy files to temporary location temp_dir = Path(tempfile.mkdtemp(prefix="test_copy_")) print(f"Copying files to: {temp_dir}") for file_path in test_files: dest = temp_dir / file_path.name shutil.copy2(file_path, dest) print("Files copied") print() # Wait 5 seconds print("Waiting 5 seconds...") time.sleep(5) print() # RUN 3: Analyze files in NEW location print("RUN 3: Analyzing files in NEW location (different path, same content)") print("-" * 80) run3_results = {} for file_path in test_files: new_path = temp_dir / file_path.name counts = run_pygount_on_file(new_path) run3_results[file_path.name] = counts print( f" {file_path.name}: code={counts['codeCount']}, doc={counts['documentationCount']}, empty={counts['emptyCount']}" ) print() # Wait 10 seconds print("Waiting 10 seconds...") time.sleep(10) print() # RUN 4: Re-analyze original location again print("RUN 4: Re-analyzing ORIGINAL location again (20s after Run 1)") print("-" * 80) run4_results = {} for file_path in test_files: counts = run_pygount_on_file(file_path) run4_results[file_path.name] = counts print( f" {file_path.name}: code={counts['codeCount']}, doc={counts['documentationCount']}, empty={counts['emptyCount']}" ) print() # ANALYSIS print("=" * 80) print("DETAILED ANALYSIS") print("=" * 80) print() differences_found = False for file_path in test_files: filename = file_path.name file_hash = file_hashes[filename] r1 = run1_results[filename] r2 = run2_results[filename] r3 = run3_results[filename] r4 = run4_results[filename] # Check if all numeric values are identical if ( r1["lineCount"] == r2["lineCount"] == r3["lineCount"] == r4["lineCount"] and r1["codeCount"] == r2["codeCount"] == r3["codeCount"] == r4["codeCount"] and r1["documentationCount"] == r2["documentationCount"] == r3["documentationCount"] == r4["documentationCount"] and r1["emptyCount"] == r2["emptyCount"] == r3["emptyCount"] == r4["emptyCount"] ): # All identical print(f"✅ {filename} - CONSISTENT across all runs") else: print(f"\n❌ {filename} - DIFFERENCES FOUND!") print(f" Content hash: {file_hash}") print( f" Run 1 (original): code={r1['codeCount']:4d}, doc={r1['documentationCount']:4d}, empty={r1['emptyCount']:4d}, total={r1['lineCount']:4d}" ) print( f" Run 2 (same, 10s): code={r2['codeCount']:4d}, doc={r2['documentationCount']:4d}, empty={r2['emptyCount']:4d}, total={r2['lineCount']:4d}" ) print( f" Run 3 (copy, 15s): code={r3['codeCount']:4d}, doc={r3['documentationCount']:4d}, empty={r3['emptyCount']:4d}, total={r3['lineCount']:4d}" ) print( f" Run 4 (original, 25s): code={r4['codeCount']:4d}, doc={r4['documentationCount']:4d}, empty={r4['emptyCount']:4d}, total={r4['lineCount']:4d}" ) # Show which pairs differ if r1 != r2: print(f" ⚠️ Run 1 ≠ Run 2 (same location, different time)") if r1 != r3: print(f" ⚠️ Run 1 ≠ Run 3 (different location)") if r1 != r4: print(f" ⚠️ Run 1 ≠ Run 4 (same location, much later)") differences_found = True print() print("=" * 80) print("FINAL CONCLUSION") print("=" * 80) if not differences_found: print("✅ SUCCESS: All files produced IDENTICAL results across all runs") print(" → pygount IS deterministic with real project files") else: print("❌ FAILURE: Some files produced DIFFERENT results!") print(" → pygount is NOT deterministic with these real files") print(" → This explains why baseline comparison shows differences") print(" → SOLUTION NEEDED: Either fix pygount usage or find alternative") # Cleanup shutil.rmtree(temp_dir)