"""Rigorous test: multiple files, multiple runs, different locations, with time delays.""" import subprocess import json import hashlib from pathlib import Path import tempfile import time import shutil # Create 10 test files with varied content test_files_content = { 'module1.py': '''"""Module 1.""" import os import sys def function_a(): """Function A.""" x = 10 y = 20 return x + y class ClassA: """Class A.""" def method1(self): pass ''', 'module2.py': '''"""Module 2.""" def function_b(): # This is a comment result = 100 return result def function_c(): """Another function.""" for i in range(10): print(i) ''', 'module3.py': '''"""Module 3 with docstrings.""" class MyClass: """A class with methods.""" def __init__(self): """Initialize.""" self.value = 0 def get_value(self): """Get value.""" return self.value def set_value(self, val): """Set value.""" self.value = val ''', 'module4.py': '''# Simple module x = 1 y = 2 z = x + y ''', 'module5.py': '''"""Complex module.""" def complex_function(): """Complex function with multiple statements.""" # Comment 1 a = 1 # Comment 2 b = 2 # Comment 3 c = 3 result = a + b + c return result ''', 'module6.py': '''import json import os data = {"key": "value"} print(json.dumps(data)) ''', 'module7.py': '''"""Module with many comments.""" # Comment line 1 # Comment line 2 # Comment line 3 def func(): """Docstring.""" # Inline comment return True ''', 'module8.py': '''class Empty: pass class AnotherEmpty: pass ''', 'module9.py': '''"""Docstring only module.""" ''', 'module10.py': '''# Many blank lines def func(): pass # More blanks ''' } def run_pygount_on_file(file_path): """Run pygount and extract counts.""" proc = subprocess.run(['pygount', '--format', 'json', str(file_path)], capture_output=True, text=True, check=True) data = json.loads(proc.stdout) if isinstance(data, list): item = data[0] else: item = data['files'][0] return { 'lineCount': item.get('lineCount'), 'sourceCount': item.get('sourceCount'), 'codeCount': item.get('codeCount'), 'documentationCount': item.get('documentationCount'), 'emptyCount': item.get('emptyCount'), } print("=" * 80) print("RIGOROUS PYGOUNT DETERMINISM TEST") print("=" * 80) print("Testing: 10 files, 4 runs each, 2 locations, with time delays") print() # Create temp directory 1 temp_dir1 = Path(tempfile.mkdtemp(prefix="test_loc1_")) print(f"Location 1: {temp_dir1}") # Write files to location 1 for filename, content in test_files_content.items(): (temp_dir1 / filename).write_text(content, encoding='utf-8') # Calculate content hashes file_hashes = {} for filename, content in test_files_content.items(): file_hashes[filename] = hashlib.md5(content.encode()).hexdigest() print(f"Created {len(test_files_content)} test files") print() # Store all results all_results = {} # Run 1: First analysis in location 1 print("RUN 1: Analyzing files in location 1 (first time)") print("-" * 80) time.sleep(2) # Small delay run1_results = {} for filename in test_files_content: file_path = temp_dir1 / filename counts = run_pygount_on_file(file_path) run1_results[filename] = counts print(f" {filename}: code={counts['codeCount']}, doc={counts['documentationCount']}, empty={counts['emptyCount']}") all_results['run1'] = run1_results print() # Run 2: Second analysis in SAME location after delay print("RUN 2: Re-analyzing SAME files after 10 second delay") print("-" * 80) time.sleep(10) # Wait 10 seconds run2_results = {} for filename in test_files_content: file_path = temp_dir1 / filename counts = run_pygount_on_file(file_path) run2_results[filename] = counts print(f" {filename}: code={counts['codeCount']}, doc={counts['documentationCount']}, empty={counts['emptyCount']}") all_results['run2'] = run2_results print() # Copy files to location 2 temp_dir2 = Path(tempfile.mkdtemp(prefix="test_loc2_")) print(f"Location 2: {temp_dir2}") for filename in test_files_content: shutil.copy2(temp_dir1 / filename, temp_dir2 / filename) print("Files copied to location 2") print() # Run 3: Analysis in NEW location print("RUN 3: Analyzing files in location 2 (different path, same content)") print("-" * 80) time.sleep(5) # Wait 5 seconds run3_results = {} for filename in test_files_content: file_path = temp_dir2 / filename counts = run_pygount_on_file(file_path) run3_results[filename] = counts print(f" {filename}: code={counts['codeCount']}, doc={counts['documentationCount']}, empty={counts['emptyCount']}") all_results['run3'] = run3_results print() # Run 4: Final analysis back in location 1 print("RUN 4: Re-analyzing files in location 1 again (after 10 more seconds)") print("-" * 80) time.sleep(10) # Wait 10 more seconds run4_results = {} for filename in test_files_content: file_path = temp_dir1 / filename counts = run_pygount_on_file(file_path) run4_results[filename] = counts print(f" {filename}: code={counts['codeCount']}, doc={counts['documentationCount']}, empty={counts['emptyCount']}") all_results['run4'] = run4_results print() # Analyze results print("=" * 80) print("ANALYSIS: Checking for differences across runs") print("=" * 80) differences_found = False for filename in test_files_content: file_hash = file_hashes[filename] # Get results from all runs r1 = all_results['run1'][filename] r2 = all_results['run2'][filename] r3 = all_results['run3'][filename] r4 = all_results['run4'][filename] # Check if all runs produced identical results if r1 == r2 == r3 == r4: continue # All identical, good else: print(f"\n❌ DIFFERENCE FOUND in {filename} (hash: {file_hash[:8]})") print(f" Run 1: code={r1['codeCount']}, doc={r1['documentationCount']}, empty={r1['emptyCount']}") print(f" Run 2: code={r2['codeCount']}, doc={r2['documentationCount']}, empty={r2['emptyCount']}") print(f" Run 3: code={r3['codeCount']}, doc={r3['documentationCount']}, empty={r3['emptyCount']}") print(f" Run 4: code={r4['codeCount']}, doc={r4['documentationCount']}, empty={r4['emptyCount']}") differences_found = True print() if not differences_found: print("✅ SUCCESS: All 10 files produced IDENTICAL results across all 4 runs!") print(" - Same results in same location (run 1 vs run 2)") print(" - Same results in different location (run 2 vs run 3)") print(" - Same results after time delays (all runs)") print() print("CONCLUSION: pygount IS deterministic") print(" → Same file content = same numeric results") print(" → Independent of: file path, time, previous runs") else: print("❌ FAILURE: pygount produced DIFFERENT results for identical content!") print() print("CONCLUSION: pygount is NOT deterministic") print(" → Cannot be reliably used for baseline comparison") print(" → Need alternative counting solution") # Cleanup shutil.rmtree(temp_dir1) shutil.rmtree(temp_dir2)