285 lines
7.3 KiB
Python
285 lines
7.3 KiB
Python
"""Rigorous test: multiple files, multiple runs, different locations, with time delays."""
|
|
|
|
import subprocess
|
|
import json
|
|
import hashlib
|
|
from pathlib import Path
|
|
import tempfile
|
|
import time
|
|
import shutil
|
|
|
|
# Create 10 test files with varied content
|
|
test_files_content = {
|
|
"module1.py": '''"""Module 1."""
|
|
import os
|
|
import sys
|
|
|
|
def function_a():
|
|
"""Function A."""
|
|
x = 10
|
|
y = 20
|
|
return x + y
|
|
|
|
class ClassA:
|
|
"""Class A."""
|
|
def method1(self):
|
|
pass
|
|
''',
|
|
"module2.py": '''"""Module 2."""
|
|
|
|
def function_b():
|
|
# This is a comment
|
|
result = 100
|
|
return result
|
|
|
|
def function_c():
|
|
"""Another function."""
|
|
for i in range(10):
|
|
print(i)
|
|
''',
|
|
"module3.py": '''"""Module 3 with docstrings."""
|
|
|
|
class MyClass:
|
|
"""A class with methods."""
|
|
|
|
def __init__(self):
|
|
"""Initialize."""
|
|
self.value = 0
|
|
|
|
def get_value(self):
|
|
"""Get value."""
|
|
return self.value
|
|
|
|
def set_value(self, val):
|
|
"""Set value."""
|
|
self.value = val
|
|
''',
|
|
"module4.py": """# Simple module
|
|
x = 1
|
|
y = 2
|
|
z = x + y
|
|
""",
|
|
"module5.py": '''"""Complex module."""
|
|
|
|
def complex_function():
|
|
"""Complex function with multiple statements."""
|
|
# Comment 1
|
|
a = 1
|
|
# Comment 2
|
|
b = 2
|
|
# Comment 3
|
|
c = 3
|
|
|
|
result = a + b + c
|
|
return result
|
|
''',
|
|
"module6.py": """import json
|
|
import os
|
|
|
|
data = {"key": "value"}
|
|
print(json.dumps(data))
|
|
""",
|
|
"module7.py": '''"""Module with many comments."""
|
|
# Comment line 1
|
|
# Comment line 2
|
|
# Comment line 3
|
|
|
|
def func():
|
|
"""Docstring."""
|
|
# Inline comment
|
|
return True
|
|
''',
|
|
"module8.py": """class Empty:
|
|
pass
|
|
|
|
class AnotherEmpty:
|
|
pass
|
|
""",
|
|
"module9.py": '''"""Docstring only module."""
|
|
''',
|
|
"module10.py": """# Many blank lines
|
|
|
|
|
|
def func():
|
|
pass
|
|
|
|
|
|
# More blanks
|
|
|
|
|
|
""",
|
|
}
|
|
|
|
|
|
def run_pygount_on_file(file_path):
|
|
"""Run pygount and extract counts."""
|
|
proc = subprocess.run(
|
|
["pygount", "--format", "json", str(file_path)],
|
|
capture_output=True,
|
|
text=True,
|
|
check=True,
|
|
)
|
|
data = json.loads(proc.stdout)
|
|
|
|
if isinstance(data, list):
|
|
item = data[0]
|
|
else:
|
|
item = data["files"][0]
|
|
|
|
return {
|
|
"lineCount": item.get("lineCount"),
|
|
"sourceCount": item.get("sourceCount"),
|
|
"codeCount": item.get("codeCount"),
|
|
"documentationCount": item.get("documentationCount"),
|
|
"emptyCount": item.get("emptyCount"),
|
|
}
|
|
|
|
|
|
print("=" * 80)
|
|
print("RIGOROUS PYGOUNT DETERMINISM TEST")
|
|
print("=" * 80)
|
|
print("Testing: 10 files, 4 runs each, 2 locations, with time delays")
|
|
print()
|
|
|
|
# Create temp directory 1
|
|
temp_dir1 = Path(tempfile.mkdtemp(prefix="test_loc1_"))
|
|
print(f"Location 1: {temp_dir1}")
|
|
|
|
# Write files to location 1
|
|
for filename, content in test_files_content.items():
|
|
(temp_dir1 / filename).write_text(content, encoding="utf-8")
|
|
|
|
# Calculate content hashes
|
|
file_hashes = {}
|
|
for filename, content in test_files_content.items():
|
|
file_hashes[filename] = hashlib.md5(content.encode()).hexdigest()
|
|
|
|
print(f"Created {len(test_files_content)} test files")
|
|
print()
|
|
|
|
# Store all results
|
|
all_results = {}
|
|
|
|
# Run 1: First analysis in location 1
|
|
print("RUN 1: Analyzing files in location 1 (first time)")
|
|
print("-" * 80)
|
|
time.sleep(2) # Small delay
|
|
run1_results = {}
|
|
for filename in test_files_content:
|
|
file_path = temp_dir1 / filename
|
|
counts = run_pygount_on_file(file_path)
|
|
run1_results[filename] = counts
|
|
print(
|
|
f" {filename}: code={counts['codeCount']}, doc={counts['documentationCount']}, empty={counts['emptyCount']}"
|
|
)
|
|
all_results["run1"] = run1_results
|
|
print()
|
|
|
|
# Run 2: Second analysis in SAME location after delay
|
|
print("RUN 2: Re-analyzing SAME files after 10 second delay")
|
|
print("-" * 80)
|
|
time.sleep(10) # Wait 10 seconds
|
|
run2_results = {}
|
|
for filename in test_files_content:
|
|
file_path = temp_dir1 / filename
|
|
counts = run_pygount_on_file(file_path)
|
|
run2_results[filename] = counts
|
|
print(
|
|
f" {filename}: code={counts['codeCount']}, doc={counts['documentationCount']}, empty={counts['emptyCount']}"
|
|
)
|
|
all_results["run2"] = run2_results
|
|
print()
|
|
|
|
# Copy files to location 2
|
|
temp_dir2 = Path(tempfile.mkdtemp(prefix="test_loc2_"))
|
|
print(f"Location 2: {temp_dir2}")
|
|
for filename in test_files_content:
|
|
shutil.copy2(temp_dir1 / filename, temp_dir2 / filename)
|
|
print("Files copied to location 2")
|
|
print()
|
|
|
|
# Run 3: Analysis in NEW location
|
|
print("RUN 3: Analyzing files in location 2 (different path, same content)")
|
|
print("-" * 80)
|
|
time.sleep(5) # Wait 5 seconds
|
|
run3_results = {}
|
|
for filename in test_files_content:
|
|
file_path = temp_dir2 / filename
|
|
counts = run_pygount_on_file(file_path)
|
|
run3_results[filename] = counts
|
|
print(
|
|
f" {filename}: code={counts['codeCount']}, doc={counts['documentationCount']}, empty={counts['emptyCount']}"
|
|
)
|
|
all_results["run3"] = run3_results
|
|
print()
|
|
|
|
# Run 4: Final analysis back in location 1
|
|
print("RUN 4: Re-analyzing files in location 1 again (after 10 more seconds)")
|
|
print("-" * 80)
|
|
time.sleep(10) # Wait 10 more seconds
|
|
run4_results = {}
|
|
for filename in test_files_content:
|
|
file_path = temp_dir1 / filename
|
|
counts = run_pygount_on_file(file_path)
|
|
run4_results[filename] = counts
|
|
print(
|
|
f" {filename}: code={counts['codeCount']}, doc={counts['documentationCount']}, empty={counts['emptyCount']}"
|
|
)
|
|
all_results["run4"] = run4_results
|
|
print()
|
|
|
|
# Analyze results
|
|
print("=" * 80)
|
|
print("ANALYSIS: Checking for differences across runs")
|
|
print("=" * 80)
|
|
|
|
differences_found = False
|
|
for filename in test_files_content:
|
|
file_hash = file_hashes[filename]
|
|
|
|
# Get results from all runs
|
|
r1 = all_results["run1"][filename]
|
|
r2 = all_results["run2"][filename]
|
|
r3 = all_results["run3"][filename]
|
|
r4 = all_results["run4"][filename]
|
|
|
|
# Check if all runs produced identical results
|
|
if r1 == r2 == r3 == r4:
|
|
continue # All identical, good
|
|
else:
|
|
print(f"\n❌ DIFFERENCE FOUND in {filename} (hash: {file_hash[:8]})")
|
|
print(
|
|
f" Run 1: code={r1['codeCount']}, doc={r1['documentationCount']}, empty={r1['emptyCount']}"
|
|
)
|
|
print(
|
|
f" Run 2: code={r2['codeCount']}, doc={r2['documentationCount']}, empty={r2['emptyCount']}"
|
|
)
|
|
print(
|
|
f" Run 3: code={r3['codeCount']}, doc={r3['documentationCount']}, empty={r3['emptyCount']}"
|
|
)
|
|
print(
|
|
f" Run 4: code={r4['codeCount']}, doc={r4['documentationCount']}, empty={r4['emptyCount']}"
|
|
)
|
|
differences_found = True
|
|
|
|
print()
|
|
if not differences_found:
|
|
print("✅ SUCCESS: All 10 files produced IDENTICAL results across all 4 runs!")
|
|
print(" - Same results in same location (run 1 vs run 2)")
|
|
print(" - Same results in different location (run 2 vs run 3)")
|
|
print(" - Same results after time delays (all runs)")
|
|
print()
|
|
print("CONCLUSION: pygount IS deterministic")
|
|
print(" → Same file content = same numeric results")
|
|
print(" → Independent of: file path, time, previous runs")
|
|
else:
|
|
print("❌ FAILURE: pygount produced DIFFERENT results for identical content!")
|
|
print()
|
|
print("CONCLUSION: pygount is NOT deterministic")
|
|
print(" → Cannot be reliably used for baseline comparison")
|
|
print(" → Need alternative counting solution")
|
|
|
|
# Cleanup
|
|
shutil.rmtree(temp_dir1)
|
|
shutil.rmtree(temp_dir2)
|