SXXXXXXX_PyUCC/tests/test_pygount_rigorous.py

285 lines
7.3 KiB
Python

"""Rigorous test: multiple files, multiple runs, different locations, with time delays."""
import subprocess
import json
import hashlib
from pathlib import Path
import tempfile
import time
import shutil
# Create 10 test files with varied content
test_files_content = {
"module1.py": '''"""Module 1."""
import os
import sys
def function_a():
"""Function A."""
x = 10
y = 20
return x + y
class ClassA:
"""Class A."""
def method1(self):
pass
''',
"module2.py": '''"""Module 2."""
def function_b():
# This is a comment
result = 100
return result
def function_c():
"""Another function."""
for i in range(10):
print(i)
''',
"module3.py": '''"""Module 3 with docstrings."""
class MyClass:
"""A class with methods."""
def __init__(self):
"""Initialize."""
self.value = 0
def get_value(self):
"""Get value."""
return self.value
def set_value(self, val):
"""Set value."""
self.value = val
''',
"module4.py": """# Simple module
x = 1
y = 2
z = x + y
""",
"module5.py": '''"""Complex module."""
def complex_function():
"""Complex function with multiple statements."""
# Comment 1
a = 1
# Comment 2
b = 2
# Comment 3
c = 3
result = a + b + c
return result
''',
"module6.py": """import json
import os
data = {"key": "value"}
print(json.dumps(data))
""",
"module7.py": '''"""Module with many comments."""
# Comment line 1
# Comment line 2
# Comment line 3
def func():
"""Docstring."""
# Inline comment
return True
''',
"module8.py": """class Empty:
pass
class AnotherEmpty:
pass
""",
"module9.py": '''"""Docstring only module."""
''',
"module10.py": """# Many blank lines
def func():
pass
# More blanks
""",
}
def run_pygount_on_file(file_path):
"""Run pygount and extract counts."""
proc = subprocess.run(
["pygount", "--format", "json", str(file_path)],
capture_output=True,
text=True,
check=True,
)
data = json.loads(proc.stdout)
if isinstance(data, list):
item = data[0]
else:
item = data["files"][0]
return {
"lineCount": item.get("lineCount"),
"sourceCount": item.get("sourceCount"),
"codeCount": item.get("codeCount"),
"documentationCount": item.get("documentationCount"),
"emptyCount": item.get("emptyCount"),
}
print("=" * 80)
print("RIGOROUS PYGOUNT DETERMINISM TEST")
print("=" * 80)
print("Testing: 10 files, 4 runs each, 2 locations, with time delays")
print()
# Create temp directory 1
temp_dir1 = Path(tempfile.mkdtemp(prefix="test_loc1_"))
print(f"Location 1: {temp_dir1}")
# Write files to location 1
for filename, content in test_files_content.items():
(temp_dir1 / filename).write_text(content, encoding="utf-8")
# Calculate content hashes
file_hashes = {}
for filename, content in test_files_content.items():
file_hashes[filename] = hashlib.md5(content.encode()).hexdigest()
print(f"Created {len(test_files_content)} test files")
print()
# Store all results
all_results = {}
# Run 1: First analysis in location 1
print("RUN 1: Analyzing files in location 1 (first time)")
print("-" * 80)
time.sleep(2) # Small delay
run1_results = {}
for filename in test_files_content:
file_path = temp_dir1 / filename
counts = run_pygount_on_file(file_path)
run1_results[filename] = counts
print(
f" {filename}: code={counts['codeCount']}, doc={counts['documentationCount']}, empty={counts['emptyCount']}"
)
all_results["run1"] = run1_results
print()
# Run 2: Second analysis in SAME location after delay
print("RUN 2: Re-analyzing SAME files after 10 second delay")
print("-" * 80)
time.sleep(10) # Wait 10 seconds
run2_results = {}
for filename in test_files_content:
file_path = temp_dir1 / filename
counts = run_pygount_on_file(file_path)
run2_results[filename] = counts
print(
f" {filename}: code={counts['codeCount']}, doc={counts['documentationCount']}, empty={counts['emptyCount']}"
)
all_results["run2"] = run2_results
print()
# Copy files to location 2
temp_dir2 = Path(tempfile.mkdtemp(prefix="test_loc2_"))
print(f"Location 2: {temp_dir2}")
for filename in test_files_content:
shutil.copy2(temp_dir1 / filename, temp_dir2 / filename)
print("Files copied to location 2")
print()
# Run 3: Analysis in NEW location
print("RUN 3: Analyzing files in location 2 (different path, same content)")
print("-" * 80)
time.sleep(5) # Wait 5 seconds
run3_results = {}
for filename in test_files_content:
file_path = temp_dir2 / filename
counts = run_pygount_on_file(file_path)
run3_results[filename] = counts
print(
f" {filename}: code={counts['codeCount']}, doc={counts['documentationCount']}, empty={counts['emptyCount']}"
)
all_results["run3"] = run3_results
print()
# Run 4: Final analysis back in location 1
print("RUN 4: Re-analyzing files in location 1 again (after 10 more seconds)")
print("-" * 80)
time.sleep(10) # Wait 10 more seconds
run4_results = {}
for filename in test_files_content:
file_path = temp_dir1 / filename
counts = run_pygount_on_file(file_path)
run4_results[filename] = counts
print(
f" {filename}: code={counts['codeCount']}, doc={counts['documentationCount']}, empty={counts['emptyCount']}"
)
all_results["run4"] = run4_results
print()
# Analyze results
print("=" * 80)
print("ANALYSIS: Checking for differences across runs")
print("=" * 80)
differences_found = False
for filename in test_files_content:
file_hash = file_hashes[filename]
# Get results from all runs
r1 = all_results["run1"][filename]
r2 = all_results["run2"][filename]
r3 = all_results["run3"][filename]
r4 = all_results["run4"][filename]
# Check if all runs produced identical results
if r1 == r2 == r3 == r4:
continue # All identical, good
else:
print(f"\n❌ DIFFERENCE FOUND in {filename} (hash: {file_hash[:8]})")
print(
f" Run 1: code={r1['codeCount']}, doc={r1['documentationCount']}, empty={r1['emptyCount']}"
)
print(
f" Run 2: code={r2['codeCount']}, doc={r2['documentationCount']}, empty={r2['emptyCount']}"
)
print(
f" Run 3: code={r3['codeCount']}, doc={r3['documentationCount']}, empty={r3['emptyCount']}"
)
print(
f" Run 4: code={r4['codeCount']}, doc={r4['documentationCount']}, empty={r4['emptyCount']}"
)
differences_found = True
print()
if not differences_found:
print("✅ SUCCESS: All 10 files produced IDENTICAL results across all 4 runs!")
print(" - Same results in same location (run 1 vs run 2)")
print(" - Same results in different location (run 2 vs run 3)")
print(" - Same results after time delays (all runs)")
print()
print("CONCLUSION: pygount IS deterministic")
print(" → Same file content = same numeric results")
print(" → Independent of: file path, time, previous runs")
else:
print("❌ FAILURE: pygount produced DIFFERENT results for identical content!")
print()
print("CONCLUSION: pygount is NOT deterministic")
print(" → Cannot be reliably used for baseline comparison")
print(" → Need alternative counting solution")
# Cleanup
shutil.rmtree(temp_dir1)
shutil.rmtree(temp_dir2)