"""Rigorous test: multiple files, multiple runs, different locations, with time delays.""" import subprocess import json import hashlib from pathlib import Path import tempfile import time import shutil # Create 10 test files with varied content test_files_content = { "module1.py": '''"""Module 1.""" import os import sys def function_a(): """Function A.""" x = 10 y = 20 return x + y class ClassA: """Class A.""" def method1(self): pass ''', "module2.py": '''"""Module 2.""" def function_b(): # This is a comment result = 100 return result def function_c(): """Another function.""" for i in range(10): print(i) ''', "module3.py": '''"""Module 3 with docstrings.""" class MyClass: """A class with methods.""" def __init__(self): """Initialize.""" self.value = 0 def get_value(self): """Get value.""" return self.value def set_value(self, val): """Set value.""" self.value = val ''', "module4.py": """# Simple module x = 1 y = 2 z = x + y """, "module5.py": '''"""Complex module.""" def complex_function(): """Complex function with multiple statements.""" # Comment 1 a = 1 # Comment 2 b = 2 # Comment 3 c = 3 result = a + b + c return result ''', "module6.py": """import json import os data = {"key": "value"} print(json.dumps(data)) """, "module7.py": '''"""Module with many comments.""" # Comment line 1 # Comment line 2 # Comment line 3 def func(): """Docstring.""" # Inline comment return True ''', "module8.py": """class Empty: pass class AnotherEmpty: pass """, "module9.py": '''"""Docstring only module.""" ''', "module10.py": """# Many blank lines def func(): pass # More blanks """, } def run_pygount_on_file(file_path): """Run pygount and extract counts.""" proc = subprocess.run( ["pygount", "--format", "json", str(file_path)], capture_output=True, text=True, check=True, ) data = json.loads(proc.stdout) if isinstance(data, list): item = data[0] else: item = data["files"][0] return { "lineCount": item.get("lineCount"), "sourceCount": item.get("sourceCount"), "codeCount": item.get("codeCount"), "documentationCount": item.get("documentationCount"), "emptyCount": item.get("emptyCount"), } print("=" * 80) print("RIGOROUS PYGOUNT DETERMINISM TEST") print("=" * 80) print("Testing: 10 files, 4 runs each, 2 locations, with time delays") print() # Create temp directory 1 temp_dir1 = Path(tempfile.mkdtemp(prefix="test_loc1_")) print(f"Location 1: {temp_dir1}") # Write files to location 1 for filename, content in test_files_content.items(): (temp_dir1 / filename).write_text(content, encoding="utf-8") # Calculate content hashes file_hashes = {} for filename, content in test_files_content.items(): file_hashes[filename] = hashlib.md5(content.encode()).hexdigest() print(f"Created {len(test_files_content)} test files") print() # Store all results all_results = {} # Run 1: First analysis in location 1 print("RUN 1: Analyzing files in location 1 (first time)") print("-" * 80) time.sleep(2) # Small delay run1_results = {} for filename in test_files_content: file_path = temp_dir1 / filename counts = run_pygount_on_file(file_path) run1_results[filename] = counts print( f" {filename}: code={counts['codeCount']}, doc={counts['documentationCount']}, empty={counts['emptyCount']}" ) all_results["run1"] = run1_results print() # Run 2: Second analysis in SAME location after delay print("RUN 2: Re-analyzing SAME files after 10 second delay") print("-" * 80) time.sleep(10) # Wait 10 seconds run2_results = {} for filename in test_files_content: file_path = temp_dir1 / filename counts = run_pygount_on_file(file_path) run2_results[filename] = counts print( f" {filename}: code={counts['codeCount']}, doc={counts['documentationCount']}, empty={counts['emptyCount']}" ) all_results["run2"] = run2_results print() # Copy files to location 2 temp_dir2 = Path(tempfile.mkdtemp(prefix="test_loc2_")) print(f"Location 2: {temp_dir2}") for filename in test_files_content: shutil.copy2(temp_dir1 / filename, temp_dir2 / filename) print("Files copied to location 2") print() # Run 3: Analysis in NEW location print("RUN 3: Analyzing files in location 2 (different path, same content)") print("-" * 80) time.sleep(5) # Wait 5 seconds run3_results = {} for filename in test_files_content: file_path = temp_dir2 / filename counts = run_pygount_on_file(file_path) run3_results[filename] = counts print( f" {filename}: code={counts['codeCount']}, doc={counts['documentationCount']}, empty={counts['emptyCount']}" ) all_results["run3"] = run3_results print() # Run 4: Final analysis back in location 1 print("RUN 4: Re-analyzing files in location 1 again (after 10 more seconds)") print("-" * 80) time.sleep(10) # Wait 10 more seconds run4_results = {} for filename in test_files_content: file_path = temp_dir1 / filename counts = run_pygount_on_file(file_path) run4_results[filename] = counts print( f" {filename}: code={counts['codeCount']}, doc={counts['documentationCount']}, empty={counts['emptyCount']}" ) all_results["run4"] = run4_results print() # Analyze results print("=" * 80) print("ANALYSIS: Checking for differences across runs") print("=" * 80) differences_found = False for filename in test_files_content: file_hash = file_hashes[filename] # Get results from all runs r1 = all_results["run1"][filename] r2 = all_results["run2"][filename] r3 = all_results["run3"][filename] r4 = all_results["run4"][filename] # Check if all runs produced identical results if r1 == r2 == r3 == r4: continue # All identical, good else: print(f"\n❌ DIFFERENCE FOUND in {filename} (hash: {file_hash[:8]})") print( f" Run 1: code={r1['codeCount']}, doc={r1['documentationCount']}, empty={r1['emptyCount']}" ) print( f" Run 2: code={r2['codeCount']}, doc={r2['documentationCount']}, empty={r2['emptyCount']}" ) print( f" Run 3: code={r3['codeCount']}, doc={r3['documentationCount']}, empty={r3['emptyCount']}" ) print( f" Run 4: code={r4['codeCount']}, doc={r4['documentationCount']}, empty={r4['emptyCount']}" ) differences_found = True print() if not differences_found: print("✅ SUCCESS: All 10 files produced IDENTICAL results across all 4 runs!") print(" - Same results in same location (run 1 vs run 2)") print(" - Same results in different location (run 2 vs run 3)") print(" - Same results after time delays (all runs)") print() print("CONCLUSION: pygount IS deterministic") print(" → Same file content = same numeric results") print(" → Independent of: file path, time, previous runs") else: print("❌ FAILURE: pygount produced DIFFERENT results for identical content!") print() print("CONCLUSION: pygount is NOT deterministic") print(" → Cannot be reliably used for baseline comparison") print(" → Need alternative counting solution") # Cleanup shutil.rmtree(temp_dir1) shutil.rmtree(temp_dir2)