SXXXXXXX_PyUCC/tests/test_pygount_rigorous.py

"""Rigorous test: multiple files, multiple runs, different locations, with time delays."""
import subprocess
import json
import hashlib
from pathlib import Path
import tempfile
import time
import shutil

# Create 10 test files with varied content
test_files_content = {
    'module1.py': '''"""Module 1."""
import os
import sys

def function_a():
    """Function A."""
    x = 10
    y = 20
    return x + y

class ClassA:
    """Class A."""
    def method1(self):
        pass
''',
    'module2.py': '''"""Module 2."""

def function_b():
    # This is a comment
    result = 100
    return result

def function_c():
    """Another function."""
    for i in range(10):
        print(i)
''',
    'module3.py': '''"""Module 3 with docstrings."""

class MyClass:
    """A class with methods."""

    def __init__(self):
        """Initialize."""
        self.value = 0

    def get_value(self):
        """Get value."""
        return self.value

    def set_value(self, val):
        """Set value."""
        self.value = val
''',
    'module4.py': '''# Simple module
x = 1
y = 2
z = x + y
''',
    'module5.py': '''"""Complex module."""

def complex_function():
    """Complex function with multiple statements."""
    # Comment 1
    a = 1
    # Comment 2
    b = 2
    # Comment 3
    c = 3

    result = a + b + c
    return result
''',
    'module6.py': '''import json
import os

data = {"key": "value"}
print(json.dumps(data))
''',
    'module7.py': '''"""Module with many comments."""
# Comment line 1
# Comment line 2
# Comment line 3

def func():
    """Docstring."""
    # Inline comment
    return True
''',
    'module8.py': '''class Empty:
    pass

class AnotherEmpty:
    pass
''',
    'module9.py': '''"""Docstring only module."""
''',
    'module10.py': '''# Many blank lines


def func():
    pass


# More blanks


'''
}

def run_pygount_on_file(file_path):
    """Run pygount and extract counts."""
    proc = subprocess.run(['pygount', '--format', 'json', str(file_path)],
                         capture_output=True, text=True, check=True)
    data = json.loads(proc.stdout)

    if isinstance(data, list):
        item = data[0]
    else:
        item = data['files'][0]

    return {
        'lineCount': item.get('lineCount'),
        'sourceCount': item.get('sourceCount'),
        'codeCount': item.get('codeCount'),
        'documentationCount': item.get('documentationCount'),
        'emptyCount': item.get('emptyCount'),
    }

print("=" * 80)
print("RIGOROUS PYGOUNT DETERMINISM TEST")
print("=" * 80)
print("Testing: 10 files, 4 runs each, 2 locations, with time delays")
print()

# Create temp directory 1
temp_dir1 = Path(tempfile.mkdtemp(prefix="test_loc1_"))
print(f"Location 1: {temp_dir1}")

# Write files to location 1
for filename, content in test_files_content.items():
    (temp_dir1 / filename).write_text(content, encoding='utf-8')

# Calculate content hashes
file_hashes = {}
for filename, content in test_files_content.items():
    file_hashes[filename] = hashlib.md5(content.encode()).hexdigest()

print(f"Created {len(test_files_content)} test files")
print()

# Store all results
all_results = {}

# Run 1: First analysis in location 1
print("RUN 1: Analyzing files in location 1 (first time)")
print("-" * 80)
time.sleep(2)  # Small delay
run1_results = {}
for filename in test_files_content:
    file_path = temp_dir1 / filename
    counts = run_pygount_on_file(file_path)
    run1_results[filename] = counts
    print(f"  {filename}: code={counts['codeCount']}, doc={counts['documentationCount']}, empty={counts['emptyCount']}")
all_results['run1'] = run1_results
print()

# Run 2: Second analysis in SAME location after delay
print("RUN 2: Re-analyzing SAME files after 10 second delay")
print("-" * 80)
time.sleep(10)  # Wait 10 seconds
run2_results = {}
for filename in test_files_content:
    file_path = temp_dir1 / filename
    counts = run_pygount_on_file(file_path)
    run2_results[filename] = counts
    print(f"  {filename}: code={counts['codeCount']}, doc={counts['documentationCount']}, empty={counts['emptyCount']}")
all_results['run2'] = run2_results
print()

# Copy files to location 2
temp_dir2 = Path(tempfile.mkdtemp(prefix="test_loc2_"))
print(f"Location 2: {temp_dir2}")
for filename in test_files_content:
    shutil.copy2(temp_dir1 / filename, temp_dir2 / filename)
print("Files copied to location 2")
print()

# Run 3: Analysis in NEW location
print("RUN 3: Analyzing files in location 2 (different path, same content)")
print("-" * 80)
time.sleep(5)  # Wait 5 seconds
run3_results = {}
for filename in test_files_content:
    file_path = temp_dir2 / filename
    counts = run_pygount_on_file(file_path)
    run3_results[filename] = counts
    print(f"  {filename}: code={counts['codeCount']}, doc={counts['documentationCount']}, empty={counts['emptyCount']}")
all_results['run3'] = run3_results
print()

# Run 4: Final analysis back in location 1
print("RUN 4: Re-analyzing files in location 1 again (after 10 more seconds)")
print("-" * 80)
time.sleep(10)  # Wait 10 more seconds
run4_results = {}
for filename in test_files_content:
    file_path = temp_dir1 / filename
    counts = run_pygount_on_file(file_path)
    run4_results[filename] = counts
    print(f"  {filename}: code={counts['codeCount']}, doc={counts['documentationCount']}, empty={counts['emptyCount']}")
all_results['run4'] = run4_results
print()

# Analyze results
print("=" * 80)
print("ANALYSIS: Checking for differences across runs")
print("=" * 80)

differences_found = False
for filename in test_files_content:
    file_hash = file_hashes[filename]

    # Get results from all runs
    r1 = all_results['run1'][filename]
    r2 = all_results['run2'][filename]
    r3 = all_results['run3'][filename]
    r4 = all_results['run4'][filename]

    # Check if all runs produced identical results
    if r1 == r2 == r3 == r4:
        continue  # All identical, good
    else:
        print(f"\n❌ DIFFERENCE FOUND in {filename} (hash: {file_hash[:8]})")
        print(f"   Run 1: code={r1['codeCount']}, doc={r1['documentationCount']}, empty={r1['emptyCount']}")
        print(f"   Run 2: code={r2['codeCount']}, doc={r2['documentationCount']}, empty={r2['emptyCount']}")
        print(f"   Run 3: code={r3['codeCount']}, doc={r3['documentationCount']}, empty={r3['emptyCount']}")
        print(f"   Run 4: code={r4['codeCount']}, doc={r4['documentationCount']}, empty={r4['emptyCount']}")
        differences_found = True

print()
if not differences_found:
    print("✅ SUCCESS: All 10 files produced IDENTICAL results across all 4 runs!")
    print("   - Same results in same location (run 1 vs run 2)")
    print("   - Same results in different location (run 2 vs run 3)")
    print("   - Same results after time delays (all runs)")
    print()
    print("CONCLUSION: pygount IS deterministic")
    print("  → Same file content = same numeric results")
    print("  → Independent of: file path, time, previous runs")
else:
    print("❌ FAILURE: pygount produced DIFFERENT results for identical content!")
    print()
    print("CONCLUSION: pygount is NOT deterministic")
    print("  → Cannot be reliably used for baseline comparison")
    print("  → Need alternative counting solution")

# Cleanup
shutil.rmtree(temp_dir1)
shutil.rmtree(temp_dir2)