SXXXXXXX_PyUCC/tests/test_ucc_python_counter.py

"""
Test UCC Python counter accuracy on real Python files.

Compares UCCPythonCounter results against actual UCC output.
"""

import subprocess
from pathlib import Path
import pytest
import sys

# Add parent directory to path
sys.path.insert(0, str(Path(__file__).parent.parent))

from pyucc.core.ucc_python_counter import UCCPythonCounter


def run_ucc_on_file(file_path: Path) -> dict:
    """
    Run actual UCC on a file and parse results.
    Returns dict with metrics.
    """
    ucc_exe = Path("C:/__temp/UCC/UCC.exe")
    if not ucc_exe.exists():
        pytest.skip(f"UCC executable not found at {ucc_exe}")

    # Run UCC
    output_dir = Path("C:/__temp/UCC_test_output")
    output_dir.mkdir(exist_ok=True)

    cmd = [str(ucc_exe), "-dir", str(file_path.parent), "-outdir", str(output_dir)]
    try:
        result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
    except Exception as e:
        pytest.skip(f"Failed to run UCC: {e}")

    # Parse output CSV
    csv_file = output_dir / "outfile_code.csv"
    if not csv_file.exists():
        pytest.skip("UCC did not generate output CSV")

    metrics = {}
    with open(csv_file, "r") as f:
        lines = f.readlines()
        # Find the line for this file
        for line in lines:
            if file_path.name in line:
                parts = line.split(",")
                if len(parts) >= 15:
                    # UCC CSV format (Python):
                    # Total, Blank, Comments, Code, Compiler Directives,
                    # Data Decl (0 for Python), Exec Instr,
                    # Logical SLOC, Physical SLOC, ...
                    try:
                        metrics = {
                            "total": int(parts[1]),
                            "blank": int(parts[2]),
                            "comment_whole": int(parts[3]),  # UCC "Whole Comments"
                            "comment_embedded": int(
                                parts[4]
                            ),  # UCC "Embedded Comments"
                            "compiler_directives": int(parts[5]),
                            "data_declarations": int(parts[6]),
                            "exec_instructions": int(parts[7]),
                            "logical_sloc": int(parts[8]),
                            "physical_sloc": int(parts[9]),
                        }
                    except (ValueError, IndexError):
                        pass
                break

    return metrics


def test_python_counter_on_real_file():
    """Test Python counter on a real Python file from the project."""
    # Use our own countings_impl.py as test file
    test_file = Path(__file__).parent.parent / "pyucc" / "core" / "countings_impl.py"

    if not test_file.exists():
        pytest.skip(f"Test file not found: {test_file}")

    # Run our counter
    counter = UCCPythonCounter()
    our_results = counter.analyze_file(test_file)

    print(f"\n\n=== Testing: {test_file.name} ===")
    print(f"Our results: {our_results}")

    # Try to get UCC results for comparison
    try:
        ucc_results = run_ucc_on_file(test_file)
        if ucc_results:
            print(f"UCC results: {ucc_results}")

            # Compare metrics
            metrics_to_compare = [
                "blank",
                "comment_whole",
                "comment_embedded",
                "compiler_directives",
                "exec_instructions",
                "logical_sloc",
                "physical_sloc",
            ]

            total_error = 0
            for metric in metrics_to_compare:
                our_val = our_results.get(metric.replace("blank", "blank_lines"), 0)
                ucc_val = ucc_results.get(metric, 0)

                if ucc_val > 0:
                    error = abs(our_val - ucc_val) / ucc_val * 100
                else:
                    error = 0 if our_val == 0 else 100

                total_error += error
                print(
                    f"{metric:20s}: Our={our_val:5d}  UCC={ucc_val:5d}  Error={error:5.1f}%"
                )

            avg_error = total_error / len(metrics_to_compare)
            print(f"\nAverage error: {avg_error:.1f}%")
            print(f"Accuracy: {100 - avg_error:.1f}%")

            # Assert reasonable accuracy
            assert (
                avg_error < 20
            ), f"Average error {avg_error:.1f}% exceeds 20% threshold"
    except Exception as e:
        print(f"Could not compare with UCC: {e}")

    # Basic sanity checks on our results
    assert our_results["blank_lines"] > 0, "Should have some blank lines"
    assert our_results["physical_sloc"] > 0, "Should have physical SLOC"
    assert our_results["logical_sloc"] > 0, "Should have logical SLOC"
    assert our_results["comment_whole"] >= 0, "Comment whole should be >= 0"
    assert our_results["data_declarations"] == 0, "Python has no data declarations"


def test_python_counter_on_multiple_files():
    """Test Python counter on multiple Python files from the project."""
    # Test on several files
    test_files = [
        Path(__file__).parent.parent / "pyucc" / "core" / "countings_impl.py",
        Path(__file__).parent.parent / "pyucc" / "core" / "scanner.py",
        Path(__file__).parent.parent / "pyucc" / "core" / "differ.py",
        Path(__file__).parent.parent / "pyucc" / "gui" / "gui.py",
        Path(__file__).parent.parent / "pyucc" / "utils" / "logger.py",
    ]

    results = []
    for test_file in test_files:
        if not test_file.exists():
            continue

        counter = UCCPythonCounter()
        our_results = counter.analyze_file(test_file)

        print(f"\n{test_file.name}:")
        print(f"  Blank: {our_results['blank_lines']}")
        print(
            f"  Comments (W/E): {our_results['comment_whole']}/{our_results['comment_embedded']}"
        )
        print(f"  Directives: {our_results['compiler_directives']}")
        print(f"  Exec: {our_results['exec_instructions']}")
        print(f"  Logical SLOC: {our_results['logical_sloc']}")
        print(f"  Physical SLOC: {our_results['physical_sloc']}")

        results.append(our_results)

    assert len(results) > 0, "Should have tested at least one file"

    # Check that all files have reasonable values
    for result in results:
        assert result["physical_sloc"] > 0, "Should have physical SLOC"
        assert result["logical_sloc"] > 0, "Should have logical SLOC"
        assert result["data_declarations"] == 0, "Python has no data declarations"


def test_python_comment_types():
    """Test that Python counter correctly identifies comment types."""
    # Create a test file with various comment patterns
    test_content = '''
# This is a whole line comment

x = 5  # This is an embedded comment

"""
This is a
multi-line
docstring comment
"""

def foo():
    """This is a single-line docstring"""
    pass

y = 10  # Another embedded

# Another whole line
z = 20
'''

    # Write to temp file
    import tempfile

    with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f:
        f.write(test_content)
        temp_path = Path(f.name)

    try:
        counter = UCCPythonCounter()
        results = counter.analyze_file(temp_path)

        print(f"\nComment test results:")
        print(f"  Whole comments: {results['comment_whole']}")
        print(f"  Embedded comments: {results['comment_embedded']}")

        # Should have both whole and embedded comments
        assert results["comment_whole"] > 0, "Should have whole line comments"
        assert results["comment_embedded"] > 0, "Should have embedded comments"

    finally:
        temp_path.unlink()


def test_python_directives():
    """Test that Python counter correctly counts import directives."""
    test_content = """
import os
import sys
from pathlib import Path
from typing import Dict, List

def foo():
    import json  # This should also be a directive
    pass
"""

    import tempfile

    with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f:
        f.write(test_content)
        temp_path = Path(f.name)

    try:
        counter = UCCPythonCounter()
        results = counter.analyze_file(temp_path)

        print(f"\nDirective test results:")
        print(f"  Directives: {results['compiler_directives']}")

        # Should count all import/from statements
        assert results["compiler_directives"] >= 4, "Should have at least 4 directives"

    finally:
        temp_path.unlink()


if __name__ == "__main__":
    # Run tests
    test_python_counter_on_real_file()
    test_python_counter_on_multiple_files()
    test_python_comment_types()
    test_python_directives()
    print("\n✓ All tests passed!")