""" UCC-style report generator for PyUcc. Generates reports in UCC format for: - Scan results - Counting results - Differ results - Metrics results """ from pathlib import Path from typing import Dict, List, Any, Optional from datetime import datetime import os class UCCReportGenerator: """Generates UCC-style text reports.""" @staticmethod def _format_header(command_description: str) -> str: """Generate PyUcc-style header.""" now = datetime.now() date_str = now.strftime("%d %m %Y") time_str = now.strftime("%H:%M:%S") header = [] header.append("=" * 100) header.append("") header.append(" " * 32 + "SLOC COUNT RESULTS") header.append(" " * 25 + f"Generated by PyUcc on {date_str} at {time_str}") header.append(f"{command_description}") header.append("=" * 100) header.append("") return "\n".join(header) @staticmethod def _format_counting_table_header(language: str = "ALL") -> str: """Generate counting table header.""" lines = [] lines.append(" " * 35 + f"RESULTS FOR {language} FILES") lines.append("") lines.append( "NOTE: Total Lines = all lines in file | Blank Lines = empty lines" ) lines.append( " Comments (Whole) = comment-only lines | Comments (Embedded) = inline comments" ) lines.append( " Compiler Directives = preprocessor commands (#include, #define, etc.)" ) lines.append( " Data Declarations = variable/type declarations | Exec. Instructions = executable code" ) lines.append( " Logical SLOC = statements | Physical SLOC = lines of code (excluding blank/comments)" ) lines.append("") lines.append( " Total Blank | Comments | Compiler Data Exec. | Logical Physical | File Module" ) lines.append( " Lines Lines | Whole Embedded | Direct. Decl. Instr. | SLOC SLOC | Type Name" ) lines.append("-" * 100 + "-" * 25) return "\n".join(lines) @staticmethod def _format_counting_row(result: Dict[str, Any], base_path: str = "") -> str: """Format a single counting result row in UCC style.""" # Extract values total = result.get("physical_lines", 0) blank = result.get("blank_lines", 0) comment_whole = result.get("comment_whole", 0) comment_embed = result.get("comment_embedded", 0) directives = result.get("compiler_directives", 0) data_decl = result.get("data_declarations", 0) exec_inst = result.get("exec_instructions", 0) logical = result.get("logical_sloc", 0) physical = result.get("physical_sloc", 0) # Get file path (relative to base if provided) file_path = result.get("file", result.get("path", "")) if base_path and file_path: try: file_path = os.path.relpath(file_path, base_path) except: pass # Format: align numbers right row = ( f"{total:8d} {blank:7d} | " f"{comment_whole:7d} {comment_embed:8d} | " f"{directives:7d} {data_decl:6d} {exec_inst:7d} | " f"{logical:7d} {physical:9d} | " f"CODE {file_path}" ) return row @staticmethod def _format_summary(results: List[Dict[str, Any]]) -> str: """Generate summary section.""" if not results: return "" # Calculate totals total_lines = sum(r.get("physical_lines", 0) for r in results) total_blank = sum(r.get("blank_lines", 0) for r in results) total_cmt_whole = sum(r.get("comment_whole", 0) for r in results) total_cmt_embed = sum(r.get("comment_embedded", 0) for r in results) total_directives = sum(r.get("compiler_directives", 0) for r in results) total_data = sum(r.get("data_declarations", 0) for r in results) total_exec = sum(r.get("exec_instructions", 0) for r in results) total_logical = sum(r.get("logical_sloc", 0) for r in results) total_physical = sum(r.get("physical_sloc", 0) for r in results) # Calculate ratio ratio = total_physical / total_logical if total_logical > 0 else 0.0 lines = [] lines.append("") lines.append(" " * 40 + "RESULTS SUMMARY") lines.append("") lines.append( " Total Blank | Comments | Compiler Data Exec. | | File SLOC" ) lines.append( " Lines Lines | Whole Embedded | Direct. Decl. Instr. | SLOC | Type Definition" ) lines.append("-" * 100) # Physical SLOC row lines.append( f"{total_lines:8d} {total_blank:7d} | " f"{total_cmt_whole:7d} {total_cmt_embed:8d} | " f"{total_directives:7d} {total_data:6d} {total_exec:7d} | " f"{total_physical:7d} | CODE Physical" ) # Logical SLOC row lines.append( f"{total_lines:8d} {total_blank:7d} | " f"{total_cmt_whole:7d} {total_cmt_embed:8d} | " f"{total_directives:7d} {total_data:6d} {total_exec:7d} | " f"{total_logical:7d} | CODE Logical" ) lines.append("") lines.append( f"Number of files successfully accessed........................ {len(results):6d} out of {len(results):6d}" ) lines.append("") lines.append( f"Ratio of Physical to Logical SLOC............................ {ratio:.2f}" ) lines.append("") return "\n".join(lines) @staticmethod def generate_counting_report( results: List[Dict[str, Any]], output_path: Path, command_description: str = "", base_path: str = "", language_filter: Optional[str] = None, ) -> None: """ Generate UCC-style counting report. Args: results: List of counting results output_path: Path to save report command_description: Description of command run base_path: Base path for relative file paths language_filter: Optional language to filter by """ # Filter by language if specified if language_filter: results = [ r for r in results if r.get("language", "").lower() == language_filter.lower() ] # Group by language by_language = {} for r in results: lang = r.get("language", "unknown").upper() if lang not in by_language: by_language[lang] = [] by_language[lang].append(r) # Build report report_lines = [] report_lines.append(UCCReportGenerator._format_header(command_description)) report_lines.append("") # Add sections for each language for lang, lang_results in sorted(by_language.items()): report_lines.append(UCCReportGenerator._format_counting_table_header(lang)) for result in lang_results: report_lines.append( UCCReportGenerator._format_counting_row(result, base_path) ) report_lines.append("") # Add summary report_lines.append(UCCReportGenerator._format_summary(results)) # Write to file output_path.parent.mkdir(parents=True, exist_ok=True) with open(output_path, "w", encoding="utf-8") as f: f.write("\n".join(report_lines)) @staticmethod def generate_differ_report( diff_results: List[Dict[str, Any]], output_path: Path, baseline_id: str, command_description: str = "", ) -> None: """ Generate UCC-style differ report showing Baseline-A vs Baseline-B. Args: diff_results: List of differ results output_path: Path to save report baseline_id: Baseline identifier command_description: Description of command run """ report_lines = [] report_lines.append(UCCReportGenerator._format_header(command_description)) report_lines.append("") report_lines.append(" " * 30 + "DIFFERENTIAL RESULTS") report_lines.append("") report_lines.append( "NOTE: This report compares Baseline-A (previous) vs Baseline-B (current)" ) report_lines.append(" MODIFIED = files changed between baselines") report_lines.append( " ADDED = files added in Baseline-B | DELETED = files removed from Baseline-A" ) report_lines.append( " Delta = change in Physical SLOC (positive = code added, negative = code removed)" ) report_lines.append("") report_lines.append(" " * 25 + f"Baseline-A: {baseline_id}") report_lines.append(" " * 25 + "Baseline-B: Current") report_lines.append("") report_lines.append(" File Status | Baseline-A | Baseline-B | Delta Lines") report_lines.append( " | (Physical) | (Physical) | (Code/Cmt/Blank)" ) report_lines.append("-" * 80) for result in diff_results: status = ( "MODIFIED" if result.get("modified", 0) > 0 else ( "ADDED" if result.get("added", 0) > 0 else "DELETED" if result.get("deleted", 0) > 0 else "UNCHANGED" ) ) baseline_file = result.get("fileA", "") current_file = result.get("fileB", "") # Get metrics baseline_counts = result.get("baseline_countings", {}) current_counts = result.get("current_countings", {}) deltas = result.get("countings_delta", {}) baseline_phys = ( baseline_counts.get("physical_lines", 0) if baseline_counts else 0 ) current_phys = ( current_counts.get("physical_lines", 0) if current_counts else 0 ) delta_code = deltas.get("code_lines", 0) if deltas else 0 delta_comment = deltas.get("comment_lines", 0) if deltas else 0 delta_blank = deltas.get("blank_lines", 0) if deltas else 0 file_name = current_file or baseline_file report_lines.append( f"{status:15s} | {baseline_phys:10d} | {current_phys:10d} | " f"{delta_code:+6d}/{delta_comment:+6d}/{delta_blank:+6d} {file_name}" ) report_lines.append("") # Write to file output_path.parent.mkdir(parents=True, exist_ok=True) with open(output_path, "w", encoding="utf-8") as f: f.write("\n".join(report_lines)) @staticmethod def generate_duplicates_report( duplicates: List[Dict[str, Any]], output_path: Path, command_description: str = "", base_path: str = "", params: Optional[Dict[str, Any]] = None, ) -> None: """ Generate a UCC-style duplicates report. Args: duplicates: list of dicts with keys: file_a, file_b, match_type, pct_change output_path: path to save report command_description: optional description line base_path: optional base path to relativize file paths """ report_lines: List[str] = [] report_lines.append(UCCReportGenerator._format_header(command_description)) report_lines.append("") report_lines.append(" " * 35 + "DUPLICATE FILES REPORT") report_lines.append("") report_lines.append( "NOTE: Exact duplicates are byte-identical (after normalization)." ) report_lines.append( " Fuzzy duplicates are similar files within the configured threshold." ) if params: # include parameters used for reproducibility report_lines.append("") report_lines.append("Search parameters:") thr = params.get("threshold") exts = params.get("extensions") k = params.get("k") window = params.get("window") report_lines.append(f" Threshold: {thr}") report_lines.append(f" Extensions: {exts}") report_lines.append(f" Fingerprint k: {k}") report_lines.append(f" Winnowing window: {window}") report_lines.append("") # Separate exact and fuzzy exact = [d for d in duplicates if d.get("match_type") == "exact"] fuzzy = [d for d in duplicates if d.get("match_type") == "fuzzy"] report_lines.append(f"Exact duplicates: {len(exact)}") report_lines.append("" if exact else "No exact duplicates found.") if exact: report_lines.append("\nExact duplicate pairs:\n") report_lines.append(" File A" + " " * 4 + "| File B") report_lines.append("-" * 100) for d in exact: a = d.get("file_a", "") b = d.get("file_b", "") if base_path: try: a = str(Path(a).relative_to(base_path)) except Exception: pass try: b = str(Path(b).relative_to(base_path)) except Exception: pass report_lines.append(f"{a} | {b}") report_lines.append("") report_lines.append(f"Fuzzy duplicates (threshold): {len(fuzzy)}") report_lines.append("" if fuzzy else "No fuzzy duplicates found.") if fuzzy: report_lines.append( "\nFuzzy duplicate pairs (pct_change = approximate % lines changed):\n" ) report_lines.append("Pct | File A" + " " * 2 + "| File B") report_lines.append("-" * 100) for d in fuzzy: pct = d.get("pct_change", "") a = d.get("file_a", "") b = d.get("file_b", "") if base_path: try: a = str(Path(a).relative_to(base_path)) except Exception: pass try: b = str(Path(b).relative_to(base_path)) except Exception: pass report_lines.append(f"{str(pct):>4} | {a} | {b}") # summary report_lines.append("") report_lines.append( f"Total duplicate pairs (exact + fuzzy): {len(exact) + len(fuzzy)}" ) output_path.parent.mkdir(parents=True, exist_ok=True) with open(output_path, "w", encoding="utf-8") as f: f.write("\n".join(report_lines)) @staticmethod def generate_metrics_report( results: List[Dict[str, Any]], output_path: Path, command_description: str = "", base_path: str = "", ) -> None: """ Generate UCC-style metrics report (Cyclomatic Complexity). Args: results: List of metrics results output_path: Path to save report command_description: Description of command run base_path: Base path for relative file paths """ report_lines = [] report_lines.append(UCCReportGenerator._format_header(command_description)) report_lines.append("") report_lines.append(" " * 35 + "CYCLOMATIC COMPLEXITY RESULTS") report_lines.append("") report_lines.append( "NOTE: CC1 = McCabe Cyclomatic Complexity (Standard) - measures code complexity" ) report_lines.append( " Total_CC = sum of complexity for all functions in file" ) report_lines.append(" Average_CC = average complexity per function") report_lines.append( " Risk: Low (≤10) | Medium (≤20) | High (≤50) | Very High (>50)" ) report_lines.append( " MI = Maintainability Index (0-100): higher is better" ) report_lines.append( " 85-100 = Excellent | 65-84 = Good | 0-64 = Needs attention" ) report_lines.append("") report_lines.append(" " * 40 + "RESULTS BY FILE") report_lines.append("") report_lines.append("Cyclomatic Complexity and Maintainability Index") report_lines.append(" Total_CC Average_CC Risk MI | File Name") report_lines.append("-" * 56 + "+" + "-" * 50) total_cc = 0 total_funcs = 0 for result in results: file_path = result.get("file", "") if base_path: try: file_path = str(Path(file_path).relative_to(base_path)) except ValueError: pass # Get metrics avg_cc = result.get("avg_cc", 0.0) func_count = result.get("func_count", 0) max_cc = result.get("max_cc", 0) mi = result.get("mi", 0.0) # Calculate total CC (avg * func_count) file_total_cc = int(avg_cc * func_count) if func_count > 0 else 0 total_cc += file_total_cc total_funcs += func_count # Determine risk level based on average CC if avg_cc <= 10: risk = "Low" elif avg_cc <= 20: risk = "Medium" elif avg_cc <= 50: risk = "High" else: risk = "Very High" report_lines.append( f"{file_total_cc:10d} {avg_cc:11.2f} {risk:11s} {mi:6.2f} | {file_path}" ) report_lines.append("-" * 56 + "+" + "-" * 50) # Overall average overall_avg = total_cc / total_funcs if total_funcs > 0 else 0.0 avg_funcs_per_file = total_funcs / len(results) if results else 0.0 avg_mi = ( sum(r.get("mi", 0.0) for r in results) / len(results) if results else 0.0 ) report_lines.append( f"{total_cc:10d} {overall_avg:11.2f} {avg_mi:6.2f} Totals | {total_funcs} Functions in {len(results)} File(s)" ) report_lines.append( f"{'':10s} {avg_funcs_per_file:11.1f} {'':6s} Averages | {avg_funcs_per_file:.1f} Functions per File (Averages = Totals/Functions)" ) # Add RESULTS BY FUNCTION section report_lines.append("") report_lines.append("") report_lines.append(" " * 40 + "RESULTS BY FUNCTION") report_lines.append("") report_lines.append("Cyclomatic Complexity (CC1 = McCabe Standard)") report_lines.append( " CC1 Risk Function Name" + " " * 50 + "| File Name" ) report_lines.append("-" * 98 + "+" + "-" * 23) # Collect all functions across all files all_functions = [] for result in results: file_path = result.get("file", "") if base_path: try: file_path = str(Path(file_path).relative_to(base_path)) except ValueError: pass functions = result.get("functions", []) for func in functions: func_name = func.get("name", "unknown") cc = func.get("cc", 0) # Determine risk level if cc <= 10: risk = "Low" elif cc <= 20: risk = "Medium" elif cc <= 50: risk = "High" else: risk = "Very High" all_functions.append( {"name": func_name, "cc": cc, "risk": risk, "file": file_path} ) # Sort functions by CC descending (most complex first) all_functions.sort(key=lambda x: x["cc"], reverse=True) # Write function details for func in all_functions: func_name_trunc = func["name"][:60] # Limit function name length report_lines.append( f"{func['cc']:10d} {func['risk']:11s} {func_name_trunc:60s} | {func['file']}" ) report_lines.append("-" * 98 + "+" + "-" * 23) report_lines.append( f"{total_cc:10d} Totals {total_funcs} Functions" + " " * 50 + f"| {len(results)} File(s)" ) report_lines.append( f"{overall_avg:10.2f} Averages {avg_funcs_per_file:.1f} Functions per File (Averages = Totals/Functions)" + " " * 6 + "|" ) # If no functions were collected, add a note explaining function-level details may be missing if total_funcs == 0: report_lines.append("") report_lines.append( "NOTE: No functions were detected in the analyzed files." ) report_lines.append( "If you expect per-function complexity, ensure the optional dependency 'lizard' is installed in your environment." ) report_lines.append("Install with: pip install lizard") report_lines.append("") report_lines.append("") report_lines.append("") # Write to file output_path.parent.mkdir(parents=True, exist_ok=True) with open(output_path, "w", encoding="utf-8") as f: f.write("\n".join(report_lines))