"""Deep test of pygount determinism - check EXACT output.""" import subprocess import json import hashlib from pathlib import Path import tempfile import time # Create a test file with known content test_content = '''"""Test module.""" def function_one(): """Function one.""" x = 1 y = 2 return x + y class MyClass: """A class.""" def method(self): """Method.""" # Comment line pass if __name__ == "__main__": print("Hello") ''' temp_file = Path(tempfile.mktemp(suffix=".py")) temp_file.write_text(test_content, encoding='utf-8') print(f"Test file: {temp_file}") print(f"Content hash: {hashlib.md5(test_content.encode()).hexdigest()}") print() # Run pygount 10 times and collect EXACT JSON output results = [] for i in range(10): proc = subprocess.run(['pygount', '--format', 'json', str(temp_file)], capture_output=True, text=True, check=True) # Parse JSON data = json.loads(proc.stdout) # Extract the file entry if isinstance(data, list): item = data[0] else: item = data['files'][0] results.append({ 'run': i + 1, 'raw_json': proc.stdout, 'lineCount': item.get('lineCount'), 'sourceCount': item.get('sourceCount'), 'codeCount': item.get('codeCount'), 'documentationCount': item.get('documentationCount'), 'emptyCount': item.get('emptyCount'), 'stringCount': item.get('stringCount'), 'language': item.get('language'), }) print(f"Run {i+1}: code={item.get('codeCount')}, doc={item.get('documentationCount')}, " f"empty={item.get('emptyCount')}, source={item.get('sourceCount')}") # Check if ALL runs produced IDENTICAL results print() print("=" * 70) print("CHECKING FOR DIFFERENCES") print("=" * 70) first = results[0] all_identical = True for i, result in enumerate(results[1:], 2): if result['lineCount'] != first['lineCount'] or \ result['sourceCount'] != first['sourceCount'] or \ result['codeCount'] != first['codeCount'] or \ result['documentationCount'] != first['documentationCount'] or \ result['emptyCount'] != first['emptyCount']: print(f"\nāŒ DIFFERENCE FOUND in run {i}:") print(f" Run 1: lineCount={first['lineCount']}, sourceCount={first['sourceCount']}, " f"codeCount={first['codeCount']}, doc={first['documentationCount']}, empty={first['emptyCount']}") print(f" Run {i}: lineCount={result['lineCount']}, sourceCount={result['sourceCount']}, " f"codeCount={result['codeCount']}, doc={result['documentationCount']}, empty={result['emptyCount']}") all_identical = False # Check if raw JSON is byte-for-byte identical json_hashes = [hashlib.md5(r['raw_json'].encode()).hexdigest() for r in results] unique_hashes = set(json_hashes) print() if len(unique_hashes) == 1: print("āœ… All JSON outputs are BYTE-FOR-BYTE IDENTICAL") print(f" JSON hash: {json_hashes[0]}") else: print(f"āŒ Found {len(unique_hashes)} DIFFERENT JSON outputs:") for h in unique_hashes: count = json_hashes.count(h) print(f" Hash {h[:8]}... appeared {count} times") # Show first difference in detail print("\n First two different outputs:") for i, result in enumerate(results): if json_hashes[i] != json_hashes[0]: print(f"\n Run 1 JSON:\n{results[0]['raw_json']}") print(f"\n Run {i+1} JSON:\n{result['raw_json']}") break print() if all_identical: print("šŸŽ‰ CONCLUSION: pygount IS deterministic (all numeric values identical)") else: print("āš ļø CONCLUSION: pygount is NOT deterministic (values differ between runs)") print(" → This tool cannot be used for reliable baseline comparison") print(" → Recommend switching to a deterministic counting library") # Cleanup temp_file.unlink()