SXXXXXXX_PyUCC/pyucc/core/ucc_assembly_counter.py

282 lines
10 KiB
Python

"""
UCC-compatible counter for Assembly files.
Implements UCC algorithms for Assembly with the following metrics:
- Comment Whole Lines (;, #, |, /* */)
- Comment Embedded Lines
- Compiler Directives (assembler directives starting with .)
- Data Declarations (in .data, .bss sections)
- Exec Instructions (in .text sections)
- Logical SLOC (instruction count)
- Physical SLOC (non-blank, non-comment lines)
Assembly has distinct data and code sections (.data/.bss vs .text).
"""
import re
from pathlib import Path
from typing import Dict, List
class UCCAssemblyCounter:
"""UCC-compatible counter for Assembly files."""
# Assembly comment markers (auto-detected)
COMMENT_MARKERS = ['#', ';', '|']
# Data section markers
DATA_SECTION_MARKERS = [
'.data', '.bss', '.const', '.rdata', '.sdata', '.kdata',
'.sbss', '.lit', 'section .data', 'section .bss'
]
# Code/text section markers
TEXT_SECTION_MARKERS = [
'.text', '.code', 'section .text', 'section .txt',
'.init', '.fini', '.ktext'
]
# Directives (assembler commands starting with .)
DIRECTIVE_PREFIXES = ['.', '%']
def __init__(self):
self.results = {
'comment_whole': 0,
'comment_embedded': 0,
'compiler_directives': 0,
'data_declarations': 0,
'exec_instructions': 0,
'logical_sloc': 0,
'physical_sloc': 0,
'blank_lines': 0,
}
self.detected_comment_marker = None
def analyze_file(self, file_path: Path) -> Dict[str, int]:
"""Analyze an Assembly file using UCC algorithms."""
try:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
lines = f.readlines()
except Exception:
return self.results.copy()
# Step 1: Count blank lines BEFORE processing
self._count_blank_lines(lines)
# Step 2: Detect comment marker used in this file
self._detect_comment_marker(lines)
# Step 3: Count and remove comments
processed_lines = self._count_and_remove_comments(lines)
# Step 4: Process assembly-specific logic (sections, directives, instructions)
self._process_assembly_logic(processed_lines, lines)
return self.results.copy()
def _count_blank_lines(self, lines: List[str]) -> None:
"""Count blank lines before processing."""
for line in lines:
if not line.strip():
self.results['blank_lines'] += 1
def _detect_comment_marker(self, lines: List[str]) -> None:
"""
Detect which comment marker (;, #, |) is used in this file.
Assembly supports multiple comment styles.
"""
for line in lines:
stripped = line.strip()
if not stripped:
continue
for marker in self.COMMENT_MARKERS:
idx = stripped.find(marker)
if idx == 0:
# Marker at start of line
self.detected_comment_marker = marker
return
elif idx > 0 and stripped[idx - 1] == ' ':
# Marker after space (inline comment)
self.detected_comment_marker = marker
return
# Default to semicolon if no marker detected
self.detected_comment_marker = ';'
def _count_and_remove_comments(self, lines: List[str]) -> List[str]:
"""Count whole and embedded comments, then remove them."""
result = []
in_block = False
for line in lines:
stripped = line.strip()
if not stripped:
result.append('')
continue
# Handle block comments /* */
if in_block:
self.results['comment_whole'] += 1
if '*/' in line:
idx = line.find('*/')
after = line[idx + 2:].strip()
if after:
result.append(after)
else:
result.append('')
in_block = False
else:
result.append('')
continue
# Check for block comment start
if '/*' in line:
start_idx = line.find('/*')
before = line[:start_idx].strip()
end_idx = line.find('*/', start_idx)
if end_idx != -1:
# Block comment ends on same line
after = line[end_idx + 2:].strip()
combined = (before + ' ' + after).strip()
if combined:
self.results['comment_embedded'] += 1
result.append(combined)
else:
self.results['comment_whole'] += 1
result.append('')
else:
# Multi-line block starts
in_block = True
if before:
self.results['comment_embedded'] += 1
result.append(before)
else:
self.results['comment_whole'] += 1
result.append('')
continue
# Handle line comments (detected marker)
if self.detected_comment_marker and self.detected_comment_marker in line:
idx = line.find(self.detected_comment_marker)
before = line[:idx].strip()
if before:
self.results['comment_embedded'] += 1
result.append(before)
else:
self.results['comment_whole'] += 1
result.append('')
else:
result.append(line)
return result
def _process_assembly_logic(self, processed: List[str], original: List[str]) -> None:
"""
Process assembly-specific logic:
- Track .data/.bss vs .text sections
- Count directives (lines starting with . or %)
- Count data declarations vs exec instructions
- Handle labels (label: instruction)
- Handle continuation lines (ending with \\)
"""
is_data_section = False # True if in .data/.bss section
accumulated = ''
continuation = False
for proc_line, orig_line in zip(processed, original):
stripped = proc_line.strip()
if not stripped:
continue
# Check for line continuation (ends with \)
if stripped.endswith('\\'):
accumulated += ' ' + stripped[:-1]
continuation = True
continue
# Complete the statement
if continuation:
stripped = (accumulated + ' ' + stripped).strip()
accumulated = ''
continuation = False
# This is a physical SLOC
self.results['physical_sloc'] += 1
# Check for section switch
lower = stripped.lower()
section_changed = False
for marker in self.DATA_SECTION_MARKERS:
if lower.startswith(marker):
is_data_section = True
section_changed = True
# Section declaration itself is not data, but counted as exec
self.results['exec_instructions'] += 1
self.results['logical_sloc'] += 1
break
if not section_changed:
for marker in self.TEXT_SECTION_MARKERS:
if lower.startswith(marker):
is_data_section = False
section_changed = True
# Section declaration counted as exec
self.results['exec_instructions'] += 1
self.results['logical_sloc'] += 1
break
if section_changed:
continue
# Check for directives (start with . or %)
if any(stripped.startswith(prefix) for prefix in self.DIRECTIVE_PREFIXES):
# Skip 'end' directives (endm, endif, etc.)
if stripped.lower().startswith(('.end', 'end', '%end')):
continue
self.results['compiler_directives'] += 1
self.results['logical_sloc'] += 1
continue
# Check for label-only lines (label: with no instruction)
if stripped.endswith(':'):
continue # Don't count label-only lines
# Split by statement separator (;)
statements = [s.strip() for s in stripped.split(';') if s.strip()]
for statement in statements:
if not statement:
continue
# Skip labels within statement
if ':' in statement:
parts = statement.split(':', 1)
if len(parts) > 1 and parts[1].strip():
statement = parts[1].strip()
else:
continue # Label only
# Count as data or exec based on current section
if is_data_section:
self.results['data_declarations'] += 1
else:
self.results['exec_instructions'] += 1
self.results['logical_sloc'] += 1
# Handle incomplete statement at EOF
if accumulated.strip():
if is_data_section:
self.results['data_declarations'] += 1
else:
self.results['exec_instructions'] += 1
self.results['logical_sloc'] += 1