297 lines
9.8 KiB
Python
297 lines
9.8 KiB
Python
"""
|
|
UCC-compatible counter for Assembly files.
|
|
|
|
Implements UCC algorithms for Assembly with the following metrics:
|
|
- Comment Whole Lines (;, #, |, /* */)
|
|
- Comment Embedded Lines
|
|
- Compiler Directives (assembler directives starting with .)
|
|
- Data Declarations (in .data, .bss sections)
|
|
- Exec Instructions (in .text sections)
|
|
- Logical SLOC (instruction count)
|
|
- Physical SLOC (non-blank, non-comment lines)
|
|
|
|
Assembly has distinct data and code sections (.data/.bss vs .text).
|
|
"""
|
|
|
|
import re
|
|
from pathlib import Path
|
|
from typing import Dict, List
|
|
|
|
|
|
class UCCAssemblyCounter:
|
|
"""UCC-compatible counter for Assembly files."""
|
|
|
|
# Assembly comment markers (auto-detected)
|
|
COMMENT_MARKERS = ["#", ";", "|"]
|
|
|
|
# Data section markers
|
|
DATA_SECTION_MARKERS = [
|
|
".data",
|
|
".bss",
|
|
".const",
|
|
".rdata",
|
|
".sdata",
|
|
".kdata",
|
|
".sbss",
|
|
".lit",
|
|
"section .data",
|
|
"section .bss",
|
|
]
|
|
|
|
# Code/text section markers
|
|
TEXT_SECTION_MARKERS = [
|
|
".text",
|
|
".code",
|
|
"section .text",
|
|
"section .txt",
|
|
".init",
|
|
".fini",
|
|
".ktext",
|
|
]
|
|
|
|
# Directives (assembler commands starting with .)
|
|
DIRECTIVE_PREFIXES = [".", "%"]
|
|
|
|
def __init__(self):
|
|
self.results = {
|
|
"comment_whole": 0,
|
|
"comment_embedded": 0,
|
|
"compiler_directives": 0,
|
|
"data_declarations": 0,
|
|
"exec_instructions": 0,
|
|
"logical_sloc": 0,
|
|
"physical_sloc": 0,
|
|
"blank_lines": 0,
|
|
}
|
|
self.detected_comment_marker = None
|
|
|
|
def analyze_file(self, file_path: Path) -> Dict[str, int]:
|
|
"""Analyze an Assembly file using UCC algorithms."""
|
|
try:
|
|
with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
|
|
lines = f.readlines()
|
|
except Exception:
|
|
return self.results.copy()
|
|
|
|
# Step 1: Count blank lines BEFORE processing
|
|
self._count_blank_lines(lines)
|
|
|
|
# Step 2: Detect comment marker used in this file
|
|
self._detect_comment_marker(lines)
|
|
|
|
# Step 3: Count and remove comments
|
|
processed_lines = self._count_and_remove_comments(lines)
|
|
|
|
# Step 4: Process assembly-specific logic (sections, directives, instructions)
|
|
self._process_assembly_logic(processed_lines, lines)
|
|
|
|
return self.results.copy()
|
|
|
|
def _count_blank_lines(self, lines: List[str]) -> None:
|
|
"""Count blank lines before processing."""
|
|
for line in lines:
|
|
if not line.strip():
|
|
self.results["blank_lines"] += 1
|
|
|
|
def _detect_comment_marker(self, lines: List[str]) -> None:
|
|
"""
|
|
Detect which comment marker (;, #, |) is used in this file.
|
|
Assembly supports multiple comment styles.
|
|
"""
|
|
for line in lines:
|
|
stripped = line.strip()
|
|
if not stripped:
|
|
continue
|
|
|
|
for marker in self.COMMENT_MARKERS:
|
|
idx = stripped.find(marker)
|
|
if idx == 0:
|
|
# Marker at start of line
|
|
self.detected_comment_marker = marker
|
|
return
|
|
elif idx > 0 and stripped[idx - 1] == " ":
|
|
# Marker after space (inline comment)
|
|
self.detected_comment_marker = marker
|
|
return
|
|
|
|
# Default to semicolon if no marker detected
|
|
self.detected_comment_marker = ";"
|
|
|
|
def _count_and_remove_comments(self, lines: List[str]) -> List[str]:
|
|
"""Count whole and embedded comments, then remove them."""
|
|
result = []
|
|
in_block = False
|
|
|
|
for line in lines:
|
|
stripped = line.strip()
|
|
|
|
if not stripped:
|
|
result.append("")
|
|
continue
|
|
|
|
# Handle block comments /* */
|
|
if in_block:
|
|
self.results["comment_whole"] += 1
|
|
if "*/" in line:
|
|
idx = line.find("*/")
|
|
after = line[idx + 2 :].strip()
|
|
if after:
|
|
result.append(after)
|
|
else:
|
|
result.append("")
|
|
in_block = False
|
|
else:
|
|
result.append("")
|
|
continue
|
|
|
|
# Check for block comment start
|
|
if "/*" in line:
|
|
start_idx = line.find("/*")
|
|
before = line[:start_idx].strip()
|
|
|
|
end_idx = line.find("*/", start_idx)
|
|
if end_idx != -1:
|
|
# Block comment ends on same line
|
|
after = line[end_idx + 2 :].strip()
|
|
combined = (before + " " + after).strip()
|
|
|
|
if combined:
|
|
self.results["comment_embedded"] += 1
|
|
result.append(combined)
|
|
else:
|
|
self.results["comment_whole"] += 1
|
|
result.append("")
|
|
else:
|
|
# Multi-line block starts
|
|
in_block = True
|
|
if before:
|
|
self.results["comment_embedded"] += 1
|
|
result.append(before)
|
|
else:
|
|
self.results["comment_whole"] += 1
|
|
result.append("")
|
|
continue
|
|
|
|
# Handle line comments (detected marker)
|
|
if self.detected_comment_marker and self.detected_comment_marker in line:
|
|
idx = line.find(self.detected_comment_marker)
|
|
before = line[:idx].strip()
|
|
|
|
if before:
|
|
self.results["comment_embedded"] += 1
|
|
result.append(before)
|
|
else:
|
|
self.results["comment_whole"] += 1
|
|
result.append("")
|
|
else:
|
|
result.append(line)
|
|
|
|
return result
|
|
|
|
def _process_assembly_logic(
|
|
self, processed: List[str], original: List[str]
|
|
) -> None:
|
|
"""
|
|
Process assembly-specific logic:
|
|
- Track .data/.bss vs .text sections
|
|
- Count directives (lines starting with . or %)
|
|
- Count data declarations vs exec instructions
|
|
- Handle labels (label: instruction)
|
|
- Handle continuation lines (ending with \\)
|
|
"""
|
|
is_data_section = False # True if in .data/.bss section
|
|
accumulated = ""
|
|
continuation = False
|
|
|
|
for proc_line, orig_line in zip(processed, original):
|
|
stripped = proc_line.strip()
|
|
|
|
if not stripped:
|
|
continue
|
|
|
|
# Check for line continuation (ends with \)
|
|
if stripped.endswith("\\"):
|
|
accumulated += " " + stripped[:-1]
|
|
continuation = True
|
|
continue
|
|
|
|
# Complete the statement
|
|
if continuation:
|
|
stripped = (accumulated + " " + stripped).strip()
|
|
accumulated = ""
|
|
continuation = False
|
|
|
|
# This is a physical SLOC
|
|
self.results["physical_sloc"] += 1
|
|
|
|
# Check for section switch
|
|
lower = stripped.lower()
|
|
section_changed = False
|
|
|
|
for marker in self.DATA_SECTION_MARKERS:
|
|
if lower.startswith(marker):
|
|
is_data_section = True
|
|
section_changed = True
|
|
# Section declaration itself is not data, but counted as exec
|
|
self.results["exec_instructions"] += 1
|
|
self.results["logical_sloc"] += 1
|
|
break
|
|
|
|
if not section_changed:
|
|
for marker in self.TEXT_SECTION_MARKERS:
|
|
if lower.startswith(marker):
|
|
is_data_section = False
|
|
section_changed = True
|
|
# Section declaration counted as exec
|
|
self.results["exec_instructions"] += 1
|
|
self.results["logical_sloc"] += 1
|
|
break
|
|
|
|
if section_changed:
|
|
continue
|
|
|
|
# Check for directives (start with . or %)
|
|
if any(stripped.startswith(prefix) for prefix in self.DIRECTIVE_PREFIXES):
|
|
# Skip 'end' directives (endm, endif, etc.)
|
|
if stripped.lower().startswith((".end", "end", "%end")):
|
|
continue
|
|
|
|
self.results["compiler_directives"] += 1
|
|
self.results["logical_sloc"] += 1
|
|
continue
|
|
|
|
# Check for label-only lines (label: with no instruction)
|
|
if stripped.endswith(":"):
|
|
continue # Don't count label-only lines
|
|
|
|
# Split by statement separator (;)
|
|
statements = [s.strip() for s in stripped.split(";") if s.strip()]
|
|
|
|
for statement in statements:
|
|
if not statement:
|
|
continue
|
|
|
|
# Skip labels within statement
|
|
if ":" in statement:
|
|
parts = statement.split(":", 1)
|
|
if len(parts) > 1 and parts[1].strip():
|
|
statement = parts[1].strip()
|
|
else:
|
|
continue # Label only
|
|
|
|
# Count as data or exec based on current section
|
|
if is_data_section:
|
|
self.results["data_declarations"] += 1
|
|
else:
|
|
self.results["exec_instructions"] += 1
|
|
|
|
self.results["logical_sloc"] += 1
|
|
|
|
# Handle incomplete statement at EOF
|
|
if accumulated.strip():
|
|
if is_data_section:
|
|
self.results["data_declarations"] += 1
|
|
else:
|
|
self.results["exec_instructions"] += 1
|
|
self.results["logical_sloc"] += 1
|