SXXXXXXX_PyUCC/pyucc/utils/ucc_report_generator.py

"""
UCC-style report generator for PyUcc.

Generates reports in UCC format for:
- Scan results
- Counting results
- Differ results
- Metrics results
"""

from pathlib import Path
from typing import Dict, List, Any, Optional
from datetime import datetime
import os


class UCCReportGenerator:
    """Generates UCC-style text reports."""

    @staticmethod
    def _format_header(command_description: str) -> str:
        """Generate PyUcc-style header."""
        now = datetime.now()
        date_str = now.strftime("%d %m %Y")
        time_str = now.strftime("%H:%M:%S")

        header = []
        header.append("=" * 100)
        header.append("")
        header.append(" " * 32 + "SLOC COUNT RESULTS")
        header.append(" " * 25 + f"Generated by PyUcc on {date_str} at {time_str}")
        header.append(f"{command_description}")
        header.append("=" * 100)
        header.append("")

        return "\n".join(header)

    @staticmethod
    def _format_counting_table_header(language: str = "ALL") -> str:
        """Generate counting table header."""
        lines = []
        lines.append(" " * 35 + f"RESULTS FOR {language} FILES")
        lines.append("")
        lines.append(
            "NOTE: Total Lines = all lines in file | Blank Lines = empty lines"
        )
        lines.append(
            "      Comments (Whole) = comment-only lines | Comments (Embedded) = inline comments"
        )
        lines.append(
            "      Compiler Directives = preprocessor commands (#include, #define, etc.)"
        )
        lines.append(
            "      Data Declarations = variable/type declarations | Exec. Instructions = executable code"
        )
        lines.append(
            "      Logical SLOC = statements | Physical SLOC = lines of code (excluding blank/comments)"
        )
        lines.append("")
        lines.append(
            "   Total   Blank |      Comments    | Compiler  Data   Exec.  | Logical Physical | File  Module"
        )
        lines.append(
            "   Lines   Lines |   Whole Embedded | Direct.   Decl.  Instr. |   SLOC    SLOC   | Type  Name"
        )
        lines.append("-" * 100 + "-" * 25)

        return "\n".join(lines)

    @staticmethod
    def _format_counting_row(result: Dict[str, Any], base_path: str = "") -> str:
        """Format a single counting result row in UCC style."""
        # Extract values
        total = result.get("physical_lines", 0)
        blank = result.get("blank_lines", 0)
        comment_whole = result.get("comment_whole", 0)
        comment_embed = result.get("comment_embedded", 0)
        directives = result.get("compiler_directives", 0)
        data_decl = result.get("data_declarations", 0)
        exec_inst = result.get("exec_instructions", 0)
        logical = result.get("logical_sloc", 0)
        physical = result.get("physical_sloc", 0)

        # Get file path (relative to base if provided)
        file_path = result.get("file", result.get("path", ""))
        if base_path and file_path:
            try:
                file_path = os.path.relpath(file_path, base_path)
            except:
                pass

        # Format: align numbers right
        row = (
            f"{total:8d} {blank:7d} | "
            f"{comment_whole:7d} {comment_embed:8d} | "
            f"{directives:7d} {data_decl:6d} {exec_inst:7d} | "
            f"{logical:7d} {physical:9d} | "
            f"CODE  {file_path}"
        )

        return row

    @staticmethod
    def _format_summary(results: List[Dict[str, Any]]) -> str:
        """Generate summary section."""
        if not results:
            return ""

        # Calculate totals
        total_lines = sum(r.get("physical_lines", 0) for r in results)
        total_blank = sum(r.get("blank_lines", 0) for r in results)
        total_cmt_whole = sum(r.get("comment_whole", 0) for r in results)
        total_cmt_embed = sum(r.get("comment_embedded", 0) for r in results)
        total_directives = sum(r.get("compiler_directives", 0) for r in results)
        total_data = sum(r.get("data_declarations", 0) for r in results)
        total_exec = sum(r.get("exec_instructions", 0) for r in results)
        total_logical = sum(r.get("logical_sloc", 0) for r in results)
        total_physical = sum(r.get("physical_sloc", 0) for r in results)

        # Calculate ratio
        ratio = total_physical / total_logical if total_logical > 0 else 0.0

        lines = []
        lines.append("")
        lines.append(" " * 40 + "RESULTS SUMMARY")
        lines.append("")
        lines.append(
            "   Total   Blank |      Comments    | Compiler  Data   Exec.  |         | File  SLOC"
        )
        lines.append(
            "   Lines   Lines |   Whole Embedded | Direct.   Decl.  Instr. |   SLOC  | Type  Definition"
        )
        lines.append("-" * 100)

        # Physical SLOC row
        lines.append(
            f"{total_lines:8d} {total_blank:7d} | "
            f"{total_cmt_whole:7d} {total_cmt_embed:8d} | "
            f"{total_directives:7d} {total_data:6d} {total_exec:7d} | "
            f"{total_physical:7d} | CODE  Physical"
        )

        # Logical SLOC row
        lines.append(
            f"{total_lines:8d} {total_blank:7d} | "
            f"{total_cmt_whole:7d} {total_cmt_embed:8d} | "
            f"{total_directives:7d} {total_data:6d} {total_exec:7d} | "
            f"{total_logical:7d} | CODE  Logical"
        )

        lines.append("")
        lines.append(
            f"Number of files successfully accessed........................ {len(results):6d} out of {len(results):6d}"
        )
        lines.append("")
        lines.append(
            f"Ratio of Physical to Logical SLOC............................     {ratio:.2f}"
        )
        lines.append("")

        return "\n".join(lines)

    @staticmethod
    def generate_counting_report(
        results: List[Dict[str, Any]],
        output_path: Path,
        command_description: str = "",
        base_path: str = "",
        language_filter: Optional[str] = None,
    ) -> None:
        """
        Generate UCC-style counting report.

        Args:
            results: List of counting results
            output_path: Path to save report
            command_description: Description of command run
            base_path: Base path for relative file paths
            language_filter: Optional language to filter by
        """
        # Filter by language if specified
        if language_filter:
            results = [
                r
                for r in results
                if r.get("language", "").lower() == language_filter.lower()
            ]

        # Group by language
        by_language = {}
        for r in results:
            lang = r.get("language", "unknown").upper()
            if lang not in by_language:
                by_language[lang] = []
            by_language[lang].append(r)

        # Build report
        report_lines = []
        report_lines.append(UCCReportGenerator._format_header(command_description))
        report_lines.append("")

        # Add sections for each language
        for lang, lang_results in sorted(by_language.items()):
            report_lines.append(UCCReportGenerator._format_counting_table_header(lang))

            for result in lang_results:
                report_lines.append(
                    UCCReportGenerator._format_counting_row(result, base_path)
                )

            report_lines.append("")

        # Add summary
        report_lines.append(UCCReportGenerator._format_summary(results))

        # Write to file
        output_path.parent.mkdir(parents=True, exist_ok=True)
        with open(output_path, "w", encoding="utf-8") as f:
            f.write("\n".join(report_lines))

    @staticmethod
    def generate_differ_report(
        diff_results: List[Dict[str, Any]],
        output_path: Path,
        baseline_id: str,
        command_description: str = "",
    ) -> None:
        """
        Generate UCC-style differ report showing Baseline-A vs Baseline-B.

        Args:
            diff_results: List of differ results
            output_path: Path to save report
            baseline_id: Baseline identifier
            command_description: Description of command run
        """
        report_lines = []
        report_lines.append(UCCReportGenerator._format_header(command_description))
        report_lines.append("")
        report_lines.append(" " * 30 + "DIFFERENTIAL RESULTS")
        report_lines.append("")
        report_lines.append(
            "NOTE: This report compares Baseline-A (previous) vs Baseline-B (current)"
        )
        report_lines.append("      MODIFIED = files changed between baselines")
        report_lines.append(
            "      ADDED = files added in Baseline-B | DELETED = files removed from Baseline-A"
        )
        report_lines.append(
            "      Delta = change in Physical SLOC (positive = code added, negative = code removed)"
        )
        report_lines.append("")
        report_lines.append(" " * 25 + f"Baseline-A: {baseline_id}")
        report_lines.append(" " * 25 + "Baseline-B: Current")
        report_lines.append("")
        report_lines.append("   File Status | Baseline-A | Baseline-B | Delta Lines")
        report_lines.append(
            "               | (Physical) | (Physical) | (Code/Cmt/Blank)"
        )
        report_lines.append("-" * 80)

        for result in diff_results:
            status = (
                "MODIFIED"
                if result.get("modified", 0) > 0
                else (
                    "ADDED"
                    if result.get("added", 0) > 0
                    else "DELETED" if result.get("deleted", 0) > 0 else "UNCHANGED"
                )
            )

            baseline_file = result.get("fileA", "")
            current_file = result.get("fileB", "")

            # Get metrics
            baseline_counts = result.get("baseline_countings", {})
            current_counts = result.get("current_countings", {})
            deltas = result.get("countings_delta", {})

            baseline_phys = (
                baseline_counts.get("physical_lines", 0) if baseline_counts else 0
            )
            current_phys = (
                current_counts.get("physical_lines", 0) if current_counts else 0
            )

            delta_code = deltas.get("code_lines", 0) if deltas else 0
            delta_comment = deltas.get("comment_lines", 0) if deltas else 0
            delta_blank = deltas.get("blank_lines", 0) if deltas else 0

            file_name = current_file or baseline_file

            report_lines.append(
                f"{status:15s} | {baseline_phys:10d} | {current_phys:10d} | "
                f"{delta_code:+6d}/{delta_comment:+6d}/{delta_blank:+6d}  {file_name}"
            )

        report_lines.append("")

        # Write to file
        output_path.parent.mkdir(parents=True, exist_ok=True)
        with open(output_path, "w", encoding="utf-8") as f:
            f.write("\n".join(report_lines))

    @staticmethod
    def generate_duplicates_report(
        duplicates: List[Dict[str, Any]],
        output_path: Path,
        command_description: str = "",
        base_path: str = "",
        params: Optional[Dict[str, Any]] = None,
    ) -> None:
        """
        Generate a UCC-style duplicates report.

        Args:
            duplicates: list of dicts with keys: file_a, file_b, match_type, pct_change
            output_path: path to save report
            command_description: optional description line
            base_path: optional base path to relativize file paths
        """
        report_lines: List[str] = []
        report_lines.append(UCCReportGenerator._format_header(command_description))
        report_lines.append("")
        report_lines.append(" " * 35 + "DUPLICATE FILES REPORT")
        report_lines.append("")
        report_lines.append(
            "NOTE: Exact duplicates are byte-identical (after normalization)."
        )
        report_lines.append(
            "      Fuzzy duplicates are similar files within the configured threshold."
        )
        if params:
            # include parameters used for reproducibility
            report_lines.append("")
            report_lines.append("Search parameters:")
            thr = params.get("threshold")
            exts = params.get("extensions")
            k = params.get("k")
            window = params.get("window")
            report_lines.append(f"  Threshold: {thr}")
            report_lines.append(f"  Extensions: {exts}")
            report_lines.append(f"  Fingerprint k: {k}")
            report_lines.append(f"  Winnowing window: {window}")
        report_lines.append("")

        # Separate exact and fuzzy
        exact = [d for d in duplicates if d.get("match_type") == "exact"]
        fuzzy = [d for d in duplicates if d.get("match_type") == "fuzzy"]

        report_lines.append(f"Exact duplicates: {len(exact)}")
        report_lines.append("" if exact else "No exact duplicates found.")
        if exact:
            report_lines.append("\nExact duplicate pairs:\n")
            report_lines.append("  File A" + " " * 4 + "| File B")
            report_lines.append("-" * 100)
            for d in exact:
                a = d.get("file_a", "")
                b = d.get("file_b", "")
                if base_path:
                    try:
                        a = str(Path(a).relative_to(base_path))
                    except Exception:
                        pass
                    try:
                        b = str(Path(b).relative_to(base_path))
                    except Exception:
                        pass
                report_lines.append(f"{a} | {b}")

        report_lines.append("")
        report_lines.append(f"Fuzzy duplicates (threshold): {len(fuzzy)}")
        report_lines.append("" if fuzzy else "No fuzzy duplicates found.")
        if fuzzy:
            report_lines.append(
                "\nFuzzy duplicate pairs (pct_change = approximate % lines changed):\n"
            )
            report_lines.append("Pct  | File A" + " " * 2 + "| File B")
            report_lines.append("-" * 100)
            for d in fuzzy:
                pct = d.get("pct_change", "")
                a = d.get("file_a", "")
                b = d.get("file_b", "")
                if base_path:
                    try:
                        a = str(Path(a).relative_to(base_path))
                    except Exception:
                        pass
                    try:
                        b = str(Path(b).relative_to(base_path))
                    except Exception:
                        pass
                report_lines.append(f"{str(pct):>4} | {a} | {b}")

        # summary
        report_lines.append("")
        report_lines.append(
            f"Total duplicate pairs (exact + fuzzy): {len(exact) + len(fuzzy)}"
        )

        output_path.parent.mkdir(parents=True, exist_ok=True)
        with open(output_path, "w", encoding="utf-8") as f:
            f.write("\n".join(report_lines))

    @staticmethod
    def generate_metrics_report(
        results: List[Dict[str, Any]],
        output_path: Path,
        command_description: str = "",
        base_path: str = "",
    ) -> None:
        """
        Generate UCC-style metrics report (Cyclomatic Complexity).

        Args:
            results: List of metrics results
            output_path: Path to save report
            command_description: Description of command run
            base_path: Base path for relative file paths
        """
        report_lines = []
        report_lines.append(UCCReportGenerator._format_header(command_description))
        report_lines.append("")
        report_lines.append(" " * 35 + "CYCLOMATIC COMPLEXITY RESULTS")
        report_lines.append("")
        report_lines.append(
            "NOTE: CC1 = McCabe Cyclomatic Complexity (Standard) - measures code complexity"
        )
        report_lines.append(
            "      Total_CC = sum of complexity for all functions in file"
        )
        report_lines.append("      Average_CC = average complexity per function")
        report_lines.append(
            "      Risk: Low (≤10) | Medium (≤20) | High (≤50) | Very High (>50)"
        )
        report_lines.append(
            "      MI = Maintainability Index (0-100): higher is better"
        )
        report_lines.append(
            "           85-100 = Excellent | 65-84 = Good | 0-64 = Needs attention"
        )
        report_lines.append("")
        report_lines.append(" " * 40 + "RESULTS BY FILE")
        report_lines.append("")
        report_lines.append("Cyclomatic Complexity and Maintainability Index")
        report_lines.append(" Total_CC  Average_CC  Risk             MI  |   File Name")
        report_lines.append("-" * 56 + "+" + "-" * 50)

        total_cc = 0
        total_funcs = 0

        for result in results:
            file_path = result.get("file", "")
            if base_path:
                try:
                    file_path = str(Path(file_path).relative_to(base_path))
                except ValueError:
                    pass

            # Get metrics
            avg_cc = result.get("avg_cc", 0.0)
            func_count = result.get("func_count", 0)
            max_cc = result.get("max_cc", 0)
            mi = result.get("mi", 0.0)

            # Calculate total CC (avg * func_count)
            file_total_cc = int(avg_cc * func_count) if func_count > 0 else 0
            total_cc += file_total_cc
            total_funcs += func_count

            # Determine risk level based on average CC
            if avg_cc <= 10:
                risk = "Low"
            elif avg_cc <= 20:
                risk = "Medium"
            elif avg_cc <= 50:
                risk = "High"
            else:
                risk = "Very High"

            report_lines.append(
                f"{file_total_cc:10d} {avg_cc:11.2f}  {risk:11s} {mi:6.2f}  |   {file_path}"
            )

        report_lines.append("-" * 56 + "+" + "-" * 50)

        # Overall average
        overall_avg = total_cc / total_funcs if total_funcs > 0 else 0.0
        avg_funcs_per_file = total_funcs / len(results) if results else 0.0
        avg_mi = (
            sum(r.get("mi", 0.0) for r in results) / len(results) if results else 0.0
        )
        report_lines.append(
            f"{total_cc:10d} {overall_avg:11.2f}              {avg_mi:6.2f}  Totals      |   {total_funcs} Functions in {len(results)} File(s)"
        )
        report_lines.append(
            f"{'':10s} {avg_funcs_per_file:11.1f}              {'':6s}  Averages    |   {avg_funcs_per_file:.1f} Functions per File   (Averages = Totals/Functions)"
        )

        # Add RESULTS BY FUNCTION section
        report_lines.append("")
        report_lines.append("")
        report_lines.append(" " * 40 + "RESULTS BY FUNCTION")
        report_lines.append("")
        report_lines.append("Cyclomatic Complexity (CC1 = McCabe Standard)")
        report_lines.append(
            "       CC1   Risk        Function Name" + " " * 50 + "|   File Name"
        )
        report_lines.append("-" * 98 + "+" + "-" * 23)

        # Collect all functions across all files
        all_functions = []
        for result in results:
            file_path = result.get("file", "")
            if base_path:
                try:
                    file_path = str(Path(file_path).relative_to(base_path))
                except ValueError:
                    pass

            functions = result.get("functions", [])
            for func in functions:
                func_name = func.get("name", "unknown")
                cc = func.get("cc", 0)

                # Determine risk level
                if cc <= 10:
                    risk = "Low"
                elif cc <= 20:
                    risk = "Medium"
                elif cc <= 50:
                    risk = "High"
                else:
                    risk = "Very High"

                all_functions.append(
                    {"name": func_name, "cc": cc, "risk": risk, "file": file_path}
                )

        # Sort functions by CC descending (most complex first)
        all_functions.sort(key=lambda x: x["cc"], reverse=True)

        # Write function details
        for func in all_functions:
            func_name_trunc = func["name"][:60]  # Limit function name length
            report_lines.append(
                f"{func['cc']:10d}   {func['risk']:11s} {func_name_trunc:60s} |   {func['file']}"
            )

        report_lines.append("-" * 98 + "+" + "-" * 23)
        report_lines.append(
            f"{total_cc:10d}   Totals      {total_funcs} Functions"
            + " " * 50
            + f"|   {len(results)} File(s)"
        )
        report_lines.append(
            f"{overall_avg:10.2f}   Averages    {avg_funcs_per_file:.1f} Functions per File   (Averages = Totals/Functions)"
            + " " * 6
            + "|"
        )

        # If no functions were collected, add a note explaining function-level details may be missing
        if total_funcs == 0:
            report_lines.append("")
            report_lines.append(
                "NOTE: No functions were detected in the analyzed files."
            )
            report_lines.append(
                "If you expect per-function complexity, ensure the optional dependency 'lizard' is installed in your environment."
            )
            report_lines.append("Install with: pip install lizard")
            report_lines.append("")

        report_lines.append("")
        report_lines.append("")

        # Write to file
        output_path.parent.mkdir(parents=True, exist_ok=True)
        with open(output_path, "w", encoding="utf-8") as f:
            f.write("\n".join(report_lines))