275 lines
10 KiB
Python
275 lines
10 KiB
Python
"""
|
|
UCC-compatible counter for Java files.
|
|
|
|
Implements UCC algorithms for Java with the following metrics:
|
|
- Comment Whole Lines (/* */, //, /** */)
|
|
- Comment Embedded Lines
|
|
- Compiler Directives (import/package)
|
|
- Data Declarations (class, interface, variable declarations)
|
|
- Exec Instructions (executable statements)
|
|
- Logical SLOC (statement count)
|
|
- Physical SLOC (non-blank, non-comment lines)
|
|
|
|
Java follows C-style syntax but with Java-specific keywords.
|
|
"""
|
|
|
|
import re
|
|
from pathlib import Path
|
|
from typing import Dict, List
|
|
|
|
|
|
class UCCJavaCounter:
|
|
"""UCC-compatible counter for Java files."""
|
|
|
|
# Java directives
|
|
DIRECTIVES = {'import', 'package'}
|
|
|
|
# Java data declaration keywords
|
|
DATA_KEYWORDS = {
|
|
'abstract', 'ArrayList', 'boolean', 'byte', 'char', 'class',
|
|
'double', 'extends', 'float', 'HashMap', 'HashSet', 'implements',
|
|
'int', 'interface', 'LinkedHashMap', 'LinkedList', 'long',
|
|
'native', 'private', 'protected', 'public', 'short', 'static',
|
|
'String', 'TreeMap', 'Vector', 'void', 'volatile', 'enum',
|
|
'final', 'transient', 'synchronized'
|
|
}
|
|
|
|
# Java exec keywords
|
|
EXEC_KEYWORDS = {
|
|
'break', 'case', 'catch', 'continue', 'default', 'do', 'else',
|
|
'finally', 'for', 'if', 'new', 'return', 'super', 'switch',
|
|
'this', 'throw', 'throws', 'try', 'while', 'instanceof', 'assert'
|
|
}
|
|
|
|
def __init__(self):
|
|
self.results = {
|
|
'comment_whole': 0,
|
|
'comment_embedded': 0,
|
|
'compiler_directives': 0,
|
|
'data_declarations': 0,
|
|
'exec_instructions': 0,
|
|
'logical_sloc': 0,
|
|
'physical_sloc': 0,
|
|
'blank_lines': 0,
|
|
}
|
|
|
|
def analyze_file(self, file_path: Path) -> Dict[str, int]:
|
|
"""Analyze a Java file using UCC algorithms."""
|
|
try:
|
|
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
|
lines = f.readlines()
|
|
except Exception:
|
|
return self.results.copy()
|
|
|
|
# Step 1: Count blank lines BEFORE processing
|
|
self._count_blank_lines(lines)
|
|
|
|
# Step 2: Remove string literals (keep structure)
|
|
processed_lines = self._remove_strings(lines)
|
|
|
|
# Step 3: Count and remove comments
|
|
processed_lines = self._count_and_remove_comments(processed_lines, lines)
|
|
|
|
# Step 4: Count directives
|
|
processed_lines = self._count_directives(processed_lines, lines)
|
|
|
|
# Step 5: Count logical SLOC and classify data/exec
|
|
self._count_logical_sloc(processed_lines, lines)
|
|
|
|
return self.results.copy()
|
|
|
|
def _count_blank_lines(self, lines: List[str]) -> None:
|
|
"""Count blank lines before processing."""
|
|
for line in lines:
|
|
if not line.strip():
|
|
self.results['blank_lines'] += 1
|
|
|
|
def _remove_strings(self, lines: List[str]) -> List[str]:
|
|
"""Remove string literal contents, keep quotes."""
|
|
result = []
|
|
for line in lines:
|
|
# Replace string contents with $
|
|
processed = line
|
|
# Handle escaped quotes
|
|
processed = re.sub(r'"([^"\\]*(\\.[^"\\]*)*)"', lambda m: '"' + '$' * (len(m.group(0)) - 2) + '"', processed)
|
|
processed = re.sub(r"'([^'\\]*(\\.[^'\\]*)*)'", lambda m: "'" + '$' * (len(m.group(0)) - 2) + "'", processed)
|
|
result.append(processed)
|
|
return result
|
|
|
|
def _count_and_remove_comments(self, processed: List[str], original: List[str]) -> List[str]:
|
|
"""Count whole and embedded comments, then remove them."""
|
|
result = []
|
|
in_block = False
|
|
|
|
for proc_line, orig_line in zip(processed, original):
|
|
stripped = proc_line.strip()
|
|
|
|
if not stripped:
|
|
result.append('')
|
|
continue
|
|
|
|
if in_block:
|
|
# Inside block comment
|
|
self.results['comment_whole'] += 1
|
|
if '*/' in proc_line:
|
|
idx = proc_line.find('*/')
|
|
after = proc_line[idx + 2:].strip()
|
|
if after:
|
|
result.append(after)
|
|
else:
|
|
result.append('')
|
|
in_block = False
|
|
else:
|
|
result.append('')
|
|
continue
|
|
|
|
# Check for block comment start
|
|
if '/*' in proc_line:
|
|
start_idx = proc_line.find('/*')
|
|
before = proc_line[:start_idx].strip()
|
|
|
|
# Check if it ends on same line
|
|
end_idx = proc_line.find('*/', start_idx)
|
|
if end_idx != -1:
|
|
after = proc_line[end_idx + 2:].strip()
|
|
combined = (before + ' ' + after).strip()
|
|
|
|
if combined:
|
|
self.results['comment_embedded'] += 1
|
|
result.append(combined)
|
|
else:
|
|
self.results['comment_whole'] += 1
|
|
result.append('')
|
|
else:
|
|
# Multi-line block starts
|
|
in_block = True
|
|
if before:
|
|
self.results['comment_embedded'] += 1
|
|
result.append(before)
|
|
else:
|
|
self.results['comment_whole'] += 1
|
|
result.append('')
|
|
continue
|
|
|
|
# Check for line comment
|
|
if '//' in proc_line:
|
|
idx = proc_line.find('//')
|
|
before = proc_line[:idx].strip()
|
|
|
|
if before:
|
|
self.results['comment_embedded'] += 1
|
|
result.append(before)
|
|
else:
|
|
self.results['comment_whole'] += 1
|
|
result.append('')
|
|
else:
|
|
result.append(proc_line)
|
|
|
|
return result
|
|
|
|
def _count_directives(self, processed: List[str], original: List[str]) -> List[str]:
|
|
"""Count compiler directives (import/package)."""
|
|
result = []
|
|
|
|
for proc_line, orig_line in zip(processed, original):
|
|
stripped = proc_line.strip()
|
|
|
|
if not stripped:
|
|
result.append('')
|
|
continue
|
|
|
|
# Check if line starts with import or package
|
|
tokens = stripped.split()
|
|
if tokens and tokens[0] in self.DIRECTIVES:
|
|
self.results['compiler_directives'] += 1
|
|
result.append('')
|
|
else:
|
|
result.append(proc_line)
|
|
|
|
return result
|
|
|
|
def _count_logical_sloc(self, processed: List[str], original: List[str]) -> None:
|
|
"""Count logical SLOC and classify as data or exec."""
|
|
accumulated = ''
|
|
brace_count = 0
|
|
paren_count = 0
|
|
bracket_count = 0
|
|
|
|
for proc_line, orig_line in zip(processed, original):
|
|
stripped = proc_line.strip()
|
|
|
|
if not stripped:
|
|
continue
|
|
|
|
# This is a physical SLOC
|
|
self.results['physical_sloc'] += 1
|
|
|
|
# Track braces/parens/brackets
|
|
brace_count += stripped.count('{') - stripped.count('}')
|
|
paren_count += stripped.count('(') - stripped.count(')')
|
|
bracket_count += stripped.count('[') - stripped.count(']')
|
|
|
|
accumulated += ' ' + stripped
|
|
|
|
# Statement complete when we hit ; or { or } and no open parens/brackets
|
|
if paren_count == 0 and bracket_count == 0:
|
|
if ';' in stripped or '{' in stripped or '}' in stripped:
|
|
# Count statements by semicolons
|
|
semicolons = accumulated.count(';')
|
|
|
|
# Count braces (each { or } can be a statement)
|
|
open_braces = accumulated.count('{')
|
|
close_braces = accumulated.count('}')
|
|
|
|
# Total logical statements
|
|
num_statements = max(1, semicolons + open_braces)
|
|
|
|
# Classify as data or exec
|
|
if self._is_data_declaration(accumulated):
|
|
self.results['data_declarations'] += 1
|
|
elif self._is_exec_instruction(accumulated):
|
|
self.results['exec_instructions'] += 1
|
|
|
|
self.results['logical_sloc'] += num_statements
|
|
accumulated = ''
|
|
|
|
# Handle incomplete statement at EOF
|
|
if accumulated.strip():
|
|
if self._is_data_declaration(accumulated):
|
|
self.results['data_declarations'] += 1
|
|
elif self._is_exec_instruction(accumulated):
|
|
self.results['exec_instructions'] += 1
|
|
self.results['logical_sloc'] += 1
|
|
|
|
def _is_data_declaration(self, statement: str) -> bool:
|
|
"""Check if statement is a data declaration."""
|
|
statement_lower = statement.lower()
|
|
|
|
# Check for data keywords
|
|
for keyword in self.DATA_KEYWORDS:
|
|
pattern = r'\b' + re.escape(keyword.lower()) + r'\b'
|
|
if re.search(pattern, statement_lower):
|
|
return True
|
|
|
|
return False
|
|
|
|
def _is_exec_instruction(self, statement: str) -> bool:
|
|
"""Check if statement contains executable keywords."""
|
|
statement_lower = statement.lower()
|
|
|
|
# Check for exec keywords
|
|
for keyword in self.EXEC_KEYWORDS:
|
|
pattern = r'\b' + re.escape(keyword.lower()) + r'\b'
|
|
if re.search(pattern, statement_lower):
|
|
return True
|
|
|
|
# Check for method calls (contains '(')
|
|
if '(' in statement and not any(kw in statement_lower for kw in ['class', 'interface']):
|
|
return True
|
|
|
|
# Check for assignment (contains '=')
|
|
if '=' in statement and '==' not in statement:
|
|
return True
|
|
|
|
return False
|