""" UCC-compatible counter for Java files. Implements UCC algorithms for Java with the following metrics: - Comment Whole Lines (/* */, //, /** */) - Comment Embedded Lines - Compiler Directives (import/package) - Data Declarations (class, interface, variable declarations) - Exec Instructions (executable statements) - Logical SLOC (statement count) - Physical SLOC (non-blank, non-comment lines) Java follows C-style syntax but with Java-specific keywords. """ import re from pathlib import Path from typing import Dict, List class UCCJavaCounter: """UCC-compatible counter for Java files.""" # Java directives DIRECTIVES = {'import', 'package'} # Java data declaration keywords DATA_KEYWORDS = { 'abstract', 'ArrayList', 'boolean', 'byte', 'char', 'class', 'double', 'extends', 'float', 'HashMap', 'HashSet', 'implements', 'int', 'interface', 'LinkedHashMap', 'LinkedList', 'long', 'native', 'private', 'protected', 'public', 'short', 'static', 'String', 'TreeMap', 'Vector', 'void', 'volatile', 'enum', 'final', 'transient', 'synchronized' } # Java exec keywords EXEC_KEYWORDS = { 'break', 'case', 'catch', 'continue', 'default', 'do', 'else', 'finally', 'for', 'if', 'new', 'return', 'super', 'switch', 'this', 'throw', 'throws', 'try', 'while', 'instanceof', 'assert' } def __init__(self): self.results = { 'comment_whole': 0, 'comment_embedded': 0, 'compiler_directives': 0, 'data_declarations': 0, 'exec_instructions': 0, 'logical_sloc': 0, 'physical_sloc': 0, 'blank_lines': 0, } def analyze_file(self, file_path: Path) -> Dict[str, int]: """Analyze a Java file using UCC algorithms.""" try: with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: lines = f.readlines() except Exception: return self.results.copy() # Step 1: Count blank lines BEFORE processing self._count_blank_lines(lines) # Step 2: Remove string literals (keep structure) processed_lines = self._remove_strings(lines) # Step 3: Count and remove comments processed_lines = self._count_and_remove_comments(processed_lines, lines) # Step 4: Count directives processed_lines = self._count_directives(processed_lines, lines) # Step 5: Count logical SLOC and classify data/exec self._count_logical_sloc(processed_lines, lines) return self.results.copy() def _count_blank_lines(self, lines: List[str]) -> None: """Count blank lines before processing.""" for line in lines: if not line.strip(): self.results['blank_lines'] += 1 def _remove_strings(self, lines: List[str]) -> List[str]: """Remove string literal contents, keep quotes.""" result = [] for line in lines: # Replace string contents with $ processed = line # Handle escaped quotes processed = re.sub(r'"([^"\\]*(\\.[^"\\]*)*)"', lambda m: '"' + '$' * (len(m.group(0)) - 2) + '"', processed) processed = re.sub(r"'([^'\\]*(\\.[^'\\]*)*)'", lambda m: "'" + '$' * (len(m.group(0)) - 2) + "'", processed) result.append(processed) return result def _count_and_remove_comments(self, processed: List[str], original: List[str]) -> List[str]: """Count whole and embedded comments, then remove them.""" result = [] in_block = False for proc_line, orig_line in zip(processed, original): stripped = proc_line.strip() if not stripped: result.append('') continue if in_block: # Inside block comment self.results['comment_whole'] += 1 if '*/' in proc_line: idx = proc_line.find('*/') after = proc_line[idx + 2:].strip() if after: result.append(after) else: result.append('') in_block = False else: result.append('') continue # Check for block comment start if '/*' in proc_line: start_idx = proc_line.find('/*') before = proc_line[:start_idx].strip() # Check if it ends on same line end_idx = proc_line.find('*/', start_idx) if end_idx != -1: after = proc_line[end_idx + 2:].strip() combined = (before + ' ' + after).strip() if combined: self.results['comment_embedded'] += 1 result.append(combined) else: self.results['comment_whole'] += 1 result.append('') else: # Multi-line block starts in_block = True if before: self.results['comment_embedded'] += 1 result.append(before) else: self.results['comment_whole'] += 1 result.append('') continue # Check for line comment if '//' in proc_line: idx = proc_line.find('//') before = proc_line[:idx].strip() if before: self.results['comment_embedded'] += 1 result.append(before) else: self.results['comment_whole'] += 1 result.append('') else: result.append(proc_line) return result def _count_directives(self, processed: List[str], original: List[str]) -> List[str]: """Count compiler directives (import/package).""" result = [] for proc_line, orig_line in zip(processed, original): stripped = proc_line.strip() if not stripped: result.append('') continue # Check if line starts with import or package tokens = stripped.split() if tokens and tokens[0] in self.DIRECTIVES: self.results['compiler_directives'] += 1 result.append('') else: result.append(proc_line) return result def _count_logical_sloc(self, processed: List[str], original: List[str]) -> None: """Count logical SLOC and classify as data or exec.""" accumulated = '' brace_count = 0 paren_count = 0 bracket_count = 0 for proc_line, orig_line in zip(processed, original): stripped = proc_line.strip() if not stripped: continue # This is a physical SLOC self.results['physical_sloc'] += 1 # Track braces/parens/brackets brace_count += stripped.count('{') - stripped.count('}') paren_count += stripped.count('(') - stripped.count(')') bracket_count += stripped.count('[') - stripped.count(']') accumulated += ' ' + stripped # Statement complete when we hit ; or { or } and no open parens/brackets if paren_count == 0 and bracket_count == 0: if ';' in stripped or '{' in stripped or '}' in stripped: # Count statements by semicolons semicolons = accumulated.count(';') # Count braces (each { or } can be a statement) open_braces = accumulated.count('{') close_braces = accumulated.count('}') # Total logical statements num_statements = max(1, semicolons + open_braces) # Classify as data or exec if self._is_data_declaration(accumulated): self.results['data_declarations'] += 1 elif self._is_exec_instruction(accumulated): self.results['exec_instructions'] += 1 self.results['logical_sloc'] += num_statements accumulated = '' # Handle incomplete statement at EOF if accumulated.strip(): if self._is_data_declaration(accumulated): self.results['data_declarations'] += 1 elif self._is_exec_instruction(accumulated): self.results['exec_instructions'] += 1 self.results['logical_sloc'] += 1 def _is_data_declaration(self, statement: str) -> bool: """Check if statement is a data declaration.""" statement_lower = statement.lower() # Check for data keywords for keyword in self.DATA_KEYWORDS: pattern = r'\b' + re.escape(keyword.lower()) + r'\b' if re.search(pattern, statement_lower): return True return False def _is_exec_instruction(self, statement: str) -> bool: """Check if statement contains executable keywords.""" statement_lower = statement.lower() # Check for exec keywords for keyword in self.EXEC_KEYWORDS: pattern = r'\b' + re.escape(keyword.lower()) + r'\b' if re.search(pattern, statement_lower): return True # Check for method calls (contains '(') if '(' in statement and not any(kw in statement_lower for kw in ['class', 'interface']): return True # Check for assignment (contains '=') if '=' in statement and '==' not in statement: return True return False