"""Deep test of pygount determinism - check EXACT output.""" import subprocess import json import hashlib from pathlib import Path import tempfile import time # Create a test file with known content test_content = '''"""Test module.""" def function_one(): """Function one.""" x = 1 y = 2 return x + y class MyClass: """A class.""" def method(self): """Method.""" # Comment line pass if __name__ == "__main__": print("Hello") ''' temp_file = Path(tempfile.mktemp(suffix=".py")) temp_file.write_text(test_content, encoding="utf-8") print(f"Test file: {temp_file}") print(f"Content hash: {hashlib.md5(test_content.encode()).hexdigest()}") print() # Run pygount 10 times and collect EXACT JSON output results = [] for i in range(10): proc = subprocess.run( ["pygount", "--format", "json", str(temp_file)], capture_output=True, text=True, check=True, ) # Parse JSON data = json.loads(proc.stdout) # Extract the file entry if isinstance(data, list): item = data[0] else: item = data["files"][0] results.append( { "run": i + 1, "raw_json": proc.stdout, "lineCount": item.get("lineCount"), "sourceCount": item.get("sourceCount"), "codeCount": item.get("codeCount"), "documentationCount": item.get("documentationCount"), "emptyCount": item.get("emptyCount"), "stringCount": item.get("stringCount"), "language": item.get("language"), } ) print( f"Run {i+1}: code={item.get('codeCount')}, doc={item.get('documentationCount')}, " f"empty={item.get('emptyCount')}, source={item.get('sourceCount')}" ) # Check if ALL runs produced IDENTICAL results print() print("=" * 70) print("CHECKING FOR DIFFERENCES") print("=" * 70) first = results[0] all_identical = True for i, result in enumerate(results[1:], 2): if ( result["lineCount"] != first["lineCount"] or result["sourceCount"] != first["sourceCount"] or result["codeCount"] != first["codeCount"] or result["documentationCount"] != first["documentationCount"] or result["emptyCount"] != first["emptyCount"] ): print(f"\nāŒ DIFFERENCE FOUND in run {i}:") print( f" Run 1: lineCount={first['lineCount']}, sourceCount={first['sourceCount']}, " f"codeCount={first['codeCount']}, doc={first['documentationCount']}, empty={first['emptyCount']}" ) print( f" Run {i}: lineCount={result['lineCount']}, sourceCount={result['sourceCount']}, " f"codeCount={result['codeCount']}, doc={result['documentationCount']}, empty={result['emptyCount']}" ) all_identical = False # Check if raw JSON is byte-for-byte identical json_hashes = [hashlib.md5(r["raw_json"].encode()).hexdigest() for r in results] unique_hashes = set(json_hashes) print() if len(unique_hashes) == 1: print("āœ… All JSON outputs are BYTE-FOR-BYTE IDENTICAL") print(f" JSON hash: {json_hashes[0]}") else: print(f"āŒ Found {len(unique_hashes)} DIFFERENT JSON outputs:") for h in unique_hashes: count = json_hashes.count(h) print(f" Hash {h[:8]}... appeared {count} times") # Show first difference in detail print("\n First two different outputs:") for i, result in enumerate(results): if json_hashes[i] != json_hashes[0]: print(f"\n Run 1 JSON:\n{results[0]['raw_json']}") print(f"\n Run {i+1} JSON:\n{result['raw_json']}") break print() if all_identical: print("šŸŽ‰ CONCLUSION: pygount IS deterministic (all numeric values identical)") else: print("āš ļø CONCLUSION: pygount is NOT deterministic (values differ between runs)") print(" → This tool cannot be used for reliable baseline comparison") print(" → Recommend switching to a deterministic counting library") # Cleanup temp_file.unlink()