""" UCC-compatible counter for Assembly files. Implements UCC algorithms for Assembly with the following metrics: - Comment Whole Lines (;, #, |, /* */) - Comment Embedded Lines - Compiler Directives (assembler directives starting with .) - Data Declarations (in .data, .bss sections) - Exec Instructions (in .text sections) - Logical SLOC (instruction count) - Physical SLOC (non-blank, non-comment lines) Assembly has distinct data and code sections (.data/.bss vs .text). """ import re from pathlib import Path from typing import Dict, List class UCCAssemblyCounter: """UCC-compatible counter for Assembly files.""" # Assembly comment markers (auto-detected) COMMENT_MARKERS = ["#", ";", "|"] # Data section markers DATA_SECTION_MARKERS = [ ".data", ".bss", ".const", ".rdata", ".sdata", ".kdata", ".sbss", ".lit", "section .data", "section .bss", ] # Code/text section markers TEXT_SECTION_MARKERS = [ ".text", ".code", "section .text", "section .txt", ".init", ".fini", ".ktext", ] # Directives (assembler commands starting with .) DIRECTIVE_PREFIXES = [".", "%"] def __init__(self): self.results = { "comment_whole": 0, "comment_embedded": 0, "compiler_directives": 0, "data_declarations": 0, "exec_instructions": 0, "logical_sloc": 0, "physical_sloc": 0, "blank_lines": 0, } self.detected_comment_marker = None def analyze_file(self, file_path: Path) -> Dict[str, int]: """Analyze an Assembly file using UCC algorithms.""" try: with open(file_path, "r", encoding="utf-8", errors="ignore") as f: lines = f.readlines() except Exception: return self.results.copy() # Step 1: Count blank lines BEFORE processing self._count_blank_lines(lines) # Step 2: Detect comment marker used in this file self._detect_comment_marker(lines) # Step 3: Count and remove comments processed_lines = self._count_and_remove_comments(lines) # Step 4: Process assembly-specific logic (sections, directives, instructions) self._process_assembly_logic(processed_lines, lines) return self.results.copy() def _count_blank_lines(self, lines: List[str]) -> None: """Count blank lines before processing.""" for line in lines: if not line.strip(): self.results["blank_lines"] += 1 def _detect_comment_marker(self, lines: List[str]) -> None: """ Detect which comment marker (;, #, |) is used in this file. Assembly supports multiple comment styles. """ for line in lines: stripped = line.strip() if not stripped: continue for marker in self.COMMENT_MARKERS: idx = stripped.find(marker) if idx == 0: # Marker at start of line self.detected_comment_marker = marker return elif idx > 0 and stripped[idx - 1] == " ": # Marker after space (inline comment) self.detected_comment_marker = marker return # Default to semicolon if no marker detected self.detected_comment_marker = ";" def _count_and_remove_comments(self, lines: List[str]) -> List[str]: """Count whole and embedded comments, then remove them.""" result = [] in_block = False for line in lines: stripped = line.strip() if not stripped: result.append("") continue # Handle block comments /* */ if in_block: self.results["comment_whole"] += 1 if "*/" in line: idx = line.find("*/") after = line[idx + 2 :].strip() if after: result.append(after) else: result.append("") in_block = False else: result.append("") continue # Check for block comment start if "/*" in line: start_idx = line.find("/*") before = line[:start_idx].strip() end_idx = line.find("*/", start_idx) if end_idx != -1: # Block comment ends on same line after = line[end_idx + 2 :].strip() combined = (before + " " + after).strip() if combined: self.results["comment_embedded"] += 1 result.append(combined) else: self.results["comment_whole"] += 1 result.append("") else: # Multi-line block starts in_block = True if before: self.results["comment_embedded"] += 1 result.append(before) else: self.results["comment_whole"] += 1 result.append("") continue # Handle line comments (detected marker) if self.detected_comment_marker and self.detected_comment_marker in line: idx = line.find(self.detected_comment_marker) before = line[:idx].strip() if before: self.results["comment_embedded"] += 1 result.append(before) else: self.results["comment_whole"] += 1 result.append("") else: result.append(line) return result def _process_assembly_logic( self, processed: List[str], original: List[str] ) -> None: """ Process assembly-specific logic: - Track .data/.bss vs .text sections - Count directives (lines starting with . or %) - Count data declarations vs exec instructions - Handle labels (label: instruction) - Handle continuation lines (ending with \\) """ is_data_section = False # True if in .data/.bss section accumulated = "" continuation = False for proc_line, orig_line in zip(processed, original): stripped = proc_line.strip() if not stripped: continue # Check for line continuation (ends with \) if stripped.endswith("\\"): accumulated += " " + stripped[:-1] continuation = True continue # Complete the statement if continuation: stripped = (accumulated + " " + stripped).strip() accumulated = "" continuation = False # This is a physical SLOC self.results["physical_sloc"] += 1 # Check for section switch lower = stripped.lower() section_changed = False for marker in self.DATA_SECTION_MARKERS: if lower.startswith(marker): is_data_section = True section_changed = True # Section declaration itself is not data, but counted as exec self.results["exec_instructions"] += 1 self.results["logical_sloc"] += 1 break if not section_changed: for marker in self.TEXT_SECTION_MARKERS: if lower.startswith(marker): is_data_section = False section_changed = True # Section declaration counted as exec self.results["exec_instructions"] += 1 self.results["logical_sloc"] += 1 break if section_changed: continue # Check for directives (start with . or %) if any(stripped.startswith(prefix) for prefix in self.DIRECTIVE_PREFIXES): # Skip 'end' directives (endm, endif, etc.) if stripped.lower().startswith((".end", "end", "%end")): continue self.results["compiler_directives"] += 1 self.results["logical_sloc"] += 1 continue # Check for label-only lines (label: with no instruction) if stripped.endswith(":"): continue # Don't count label-only lines # Split by statement separator (;) statements = [s.strip() for s in stripped.split(";") if s.strip()] for statement in statements: if not statement: continue # Skip labels within statement if ":" in statement: parts = statement.split(":", 1) if len(parts) > 1 and parts[1].strip(): statement = parts[1].strip() else: continue # Label only # Count as data or exec based on current section if is_data_section: self.results["data_declarations"] += 1 else: self.results["exec_instructions"] += 1 self.results["logical_sloc"] += 1 # Handle incomplete statement at EOF if accumulated.strip(): if is_data_section: self.results["data_declarations"] += 1 else: self.results["exec_instructions"] += 1 self.results["logical_sloc"] += 1