SXXXXXXX_PyUCC/tests/test_pygount_rigorous.py

262 lines
7.2 KiB
Python

"""Rigorous test: multiple files, multiple runs, different locations, with time delays."""
import subprocess
import json
import hashlib
from pathlib import Path
import tempfile
import time
import shutil
# Create 10 test files with varied content
test_files_content = {
'module1.py': '''"""Module 1."""
import os
import sys
def function_a():
"""Function A."""
x = 10
y = 20
return x + y
class ClassA:
"""Class A."""
def method1(self):
pass
''',
'module2.py': '''"""Module 2."""
def function_b():
# This is a comment
result = 100
return result
def function_c():
"""Another function."""
for i in range(10):
print(i)
''',
'module3.py': '''"""Module 3 with docstrings."""
class MyClass:
"""A class with methods."""
def __init__(self):
"""Initialize."""
self.value = 0
def get_value(self):
"""Get value."""
return self.value
def set_value(self, val):
"""Set value."""
self.value = val
''',
'module4.py': '''# Simple module
x = 1
y = 2
z = x + y
''',
'module5.py': '''"""Complex module."""
def complex_function():
"""Complex function with multiple statements."""
# Comment 1
a = 1
# Comment 2
b = 2
# Comment 3
c = 3
result = a + b + c
return result
''',
'module6.py': '''import json
import os
data = {"key": "value"}
print(json.dumps(data))
''',
'module7.py': '''"""Module with many comments."""
# Comment line 1
# Comment line 2
# Comment line 3
def func():
"""Docstring."""
# Inline comment
return True
''',
'module8.py': '''class Empty:
pass
class AnotherEmpty:
pass
''',
'module9.py': '''"""Docstring only module."""
''',
'module10.py': '''# Many blank lines
def func():
pass
# More blanks
'''
}
def run_pygount_on_file(file_path):
"""Run pygount and extract counts."""
proc = subprocess.run(['pygount', '--format', 'json', str(file_path)],
capture_output=True, text=True, check=True)
data = json.loads(proc.stdout)
if isinstance(data, list):
item = data[0]
else:
item = data['files'][0]
return {
'lineCount': item.get('lineCount'),
'sourceCount': item.get('sourceCount'),
'codeCount': item.get('codeCount'),
'documentationCount': item.get('documentationCount'),
'emptyCount': item.get('emptyCount'),
}
print("=" * 80)
print("RIGOROUS PYGOUNT DETERMINISM TEST")
print("=" * 80)
print("Testing: 10 files, 4 runs each, 2 locations, with time delays")
print()
# Create temp directory 1
temp_dir1 = Path(tempfile.mkdtemp(prefix="test_loc1_"))
print(f"Location 1: {temp_dir1}")
# Write files to location 1
for filename, content in test_files_content.items():
(temp_dir1 / filename).write_text(content, encoding='utf-8')
# Calculate content hashes
file_hashes = {}
for filename, content in test_files_content.items():
file_hashes[filename] = hashlib.md5(content.encode()).hexdigest()
print(f"Created {len(test_files_content)} test files")
print()
# Store all results
all_results = {}
# Run 1: First analysis in location 1
print("RUN 1: Analyzing files in location 1 (first time)")
print("-" * 80)
time.sleep(2) # Small delay
run1_results = {}
for filename in test_files_content:
file_path = temp_dir1 / filename
counts = run_pygount_on_file(file_path)
run1_results[filename] = counts
print(f" {filename}: code={counts['codeCount']}, doc={counts['documentationCount']}, empty={counts['emptyCount']}")
all_results['run1'] = run1_results
print()
# Run 2: Second analysis in SAME location after delay
print("RUN 2: Re-analyzing SAME files after 10 second delay")
print("-" * 80)
time.sleep(10) # Wait 10 seconds
run2_results = {}
for filename in test_files_content:
file_path = temp_dir1 / filename
counts = run_pygount_on_file(file_path)
run2_results[filename] = counts
print(f" {filename}: code={counts['codeCount']}, doc={counts['documentationCount']}, empty={counts['emptyCount']}")
all_results['run2'] = run2_results
print()
# Copy files to location 2
temp_dir2 = Path(tempfile.mkdtemp(prefix="test_loc2_"))
print(f"Location 2: {temp_dir2}")
for filename in test_files_content:
shutil.copy2(temp_dir1 / filename, temp_dir2 / filename)
print("Files copied to location 2")
print()
# Run 3: Analysis in NEW location
print("RUN 3: Analyzing files in location 2 (different path, same content)")
print("-" * 80)
time.sleep(5) # Wait 5 seconds
run3_results = {}
for filename in test_files_content:
file_path = temp_dir2 / filename
counts = run_pygount_on_file(file_path)
run3_results[filename] = counts
print(f" {filename}: code={counts['codeCount']}, doc={counts['documentationCount']}, empty={counts['emptyCount']}")
all_results['run3'] = run3_results
print()
# Run 4: Final analysis back in location 1
print("RUN 4: Re-analyzing files in location 1 again (after 10 more seconds)")
print("-" * 80)
time.sleep(10) # Wait 10 more seconds
run4_results = {}
for filename in test_files_content:
file_path = temp_dir1 / filename
counts = run_pygount_on_file(file_path)
run4_results[filename] = counts
print(f" {filename}: code={counts['codeCount']}, doc={counts['documentationCount']}, empty={counts['emptyCount']}")
all_results['run4'] = run4_results
print()
# Analyze results
print("=" * 80)
print("ANALYSIS: Checking for differences across runs")
print("=" * 80)
differences_found = False
for filename in test_files_content:
file_hash = file_hashes[filename]
# Get results from all runs
r1 = all_results['run1'][filename]
r2 = all_results['run2'][filename]
r3 = all_results['run3'][filename]
r4 = all_results['run4'][filename]
# Check if all runs produced identical results
if r1 == r2 == r3 == r4:
continue # All identical, good
else:
print(f"\n❌ DIFFERENCE FOUND in {filename} (hash: {file_hash[:8]})")
print(f" Run 1: code={r1['codeCount']}, doc={r1['documentationCount']}, empty={r1['emptyCount']}")
print(f" Run 2: code={r2['codeCount']}, doc={r2['documentationCount']}, empty={r2['emptyCount']}")
print(f" Run 3: code={r3['codeCount']}, doc={r3['documentationCount']}, empty={r3['emptyCount']}")
print(f" Run 4: code={r4['codeCount']}, doc={r4['documentationCount']}, empty={r4['emptyCount']}")
differences_found = True
print()
if not differences_found:
print("✅ SUCCESS: All 10 files produced IDENTICAL results across all 4 runs!")
print(" - Same results in same location (run 1 vs run 2)")
print(" - Same results in different location (run 2 vs run 3)")
print(" - Same results after time delays (all runs)")
print()
print("CONCLUSION: pygount IS deterministic")
print(" → Same file content = same numeric results")
print(" → Independent of: file path, time, previous runs")
else:
print("❌ FAILURE: pygount produced DIFFERENT results for identical content!")
print()
print("CONCLUSION: pygount is NOT deterministic")
print(" → Cannot be reliably used for baseline comparison")
print(" → Need alternative counting solution")
# Cleanup
shutil.rmtree(temp_dir1)
shutil.rmtree(temp_dir2)