""" UCC-compatible counter for Assembly files. Implements UCC algorithms for Assembly with the following metrics: - Comment Whole Lines (;, #, |, /* */) - Comment Embedded Lines - Compiler Directives (assembler directives starting with .) - Data Declarations (in .data, .bss sections) - Exec Instructions (in .text sections) - Logical SLOC (instruction count) - Physical SLOC (non-blank, non-comment lines) Assembly has distinct data and code sections (.data/.bss vs .text). """ import re from pathlib import Path from typing import Dict, List class UCCAssemblyCounter: """UCC-compatible counter for Assembly files.""" # Assembly comment markers (auto-detected) COMMENT_MARKERS = ['#', ';', '|'] # Data section markers DATA_SECTION_MARKERS = [ '.data', '.bss', '.const', '.rdata', '.sdata', '.kdata', '.sbss', '.lit', 'section .data', 'section .bss' ] # Code/text section markers TEXT_SECTION_MARKERS = [ '.text', '.code', 'section .text', 'section .txt', '.init', '.fini', '.ktext' ] # Directives (assembler commands starting with .) DIRECTIVE_PREFIXES = ['.', '%'] def __init__(self): self.results = { 'comment_whole': 0, 'comment_embedded': 0, 'compiler_directives': 0, 'data_declarations': 0, 'exec_instructions': 0, 'logical_sloc': 0, 'physical_sloc': 0, 'blank_lines': 0, } self.detected_comment_marker = None def analyze_file(self, file_path: Path) -> Dict[str, int]: """Analyze an Assembly file using UCC algorithms.""" try: with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: lines = f.readlines() except Exception: return self.results.copy() # Step 1: Count blank lines BEFORE processing self._count_blank_lines(lines) # Step 2: Detect comment marker used in this file self._detect_comment_marker(lines) # Step 3: Count and remove comments processed_lines = self._count_and_remove_comments(lines) # Step 4: Process assembly-specific logic (sections, directives, instructions) self._process_assembly_logic(processed_lines, lines) return self.results.copy() def _count_blank_lines(self, lines: List[str]) -> None: """Count blank lines before processing.""" for line in lines: if not line.strip(): self.results['blank_lines'] += 1 def _detect_comment_marker(self, lines: List[str]) -> None: """ Detect which comment marker (;, #, |) is used in this file. Assembly supports multiple comment styles. """ for line in lines: stripped = line.strip() if not stripped: continue for marker in self.COMMENT_MARKERS: idx = stripped.find(marker) if idx == 0: # Marker at start of line self.detected_comment_marker = marker return elif idx > 0 and stripped[idx - 1] == ' ': # Marker after space (inline comment) self.detected_comment_marker = marker return # Default to semicolon if no marker detected self.detected_comment_marker = ';' def _count_and_remove_comments(self, lines: List[str]) -> List[str]: """Count whole and embedded comments, then remove them.""" result = [] in_block = False for line in lines: stripped = line.strip() if not stripped: result.append('') continue # Handle block comments /* */ if in_block: self.results['comment_whole'] += 1 if '*/' in line: idx = line.find('*/') after = line[idx + 2:].strip() if after: result.append(after) else: result.append('') in_block = False else: result.append('') continue # Check for block comment start if '/*' in line: start_idx = line.find('/*') before = line[:start_idx].strip() end_idx = line.find('*/', start_idx) if end_idx != -1: # Block comment ends on same line after = line[end_idx + 2:].strip() combined = (before + ' ' + after).strip() if combined: self.results['comment_embedded'] += 1 result.append(combined) else: self.results['comment_whole'] += 1 result.append('') else: # Multi-line block starts in_block = True if before: self.results['comment_embedded'] += 1 result.append(before) else: self.results['comment_whole'] += 1 result.append('') continue # Handle line comments (detected marker) if self.detected_comment_marker and self.detected_comment_marker in line: idx = line.find(self.detected_comment_marker) before = line[:idx].strip() if before: self.results['comment_embedded'] += 1 result.append(before) else: self.results['comment_whole'] += 1 result.append('') else: result.append(line) return result def _process_assembly_logic(self, processed: List[str], original: List[str]) -> None: """ Process assembly-specific logic: - Track .data/.bss vs .text sections - Count directives (lines starting with . or %) - Count data declarations vs exec instructions - Handle labels (label: instruction) - Handle continuation lines (ending with \\) """ is_data_section = False # True if in .data/.bss section accumulated = '' continuation = False for proc_line, orig_line in zip(processed, original): stripped = proc_line.strip() if not stripped: continue # Check for line continuation (ends with \) if stripped.endswith('\\'): accumulated += ' ' + stripped[:-1] continuation = True continue # Complete the statement if continuation: stripped = (accumulated + ' ' + stripped).strip() accumulated = '' continuation = False # This is a physical SLOC self.results['physical_sloc'] += 1 # Check for section switch lower = stripped.lower() section_changed = False for marker in self.DATA_SECTION_MARKERS: if lower.startswith(marker): is_data_section = True section_changed = True # Section declaration itself is not data, but counted as exec self.results['exec_instructions'] += 1 self.results['logical_sloc'] += 1 break if not section_changed: for marker in self.TEXT_SECTION_MARKERS: if lower.startswith(marker): is_data_section = False section_changed = True # Section declaration counted as exec self.results['exec_instructions'] += 1 self.results['logical_sloc'] += 1 break if section_changed: continue # Check for directives (start with . or %) if any(stripped.startswith(prefix) for prefix in self.DIRECTIVE_PREFIXES): # Skip 'end' directives (endm, endif, etc.) if stripped.lower().startswith(('.end', 'end', '%end')): continue self.results['compiler_directives'] += 1 self.results['logical_sloc'] += 1 continue # Check for label-only lines (label: with no instruction) if stripped.endswith(':'): continue # Don't count label-only lines # Split by statement separator (;) statements = [s.strip() for s in stripped.split(';') if s.strip()] for statement in statements: if not statement: continue # Skip labels within statement if ':' in statement: parts = statement.split(':', 1) if len(parts) > 1 and parts[1].strip(): statement = parts[1].strip() else: continue # Label only # Count as data or exec based on current section if is_data_section: self.results['data_declarations'] += 1 else: self.results['exec_instructions'] += 1 self.results['logical_sloc'] += 1 # Handle incomplete statement at EOF if accumulated.strip(): if is_data_section: self.results['data_declarations'] += 1 else: self.results['exec_instructions'] += 1 self.results['logical_sloc'] += 1