"""Inspect counting results and normalized hashes for two files. Usage: python tools/inspect_counts.py Example: python tools/inspect_counts.py pyucc/gui/gui.py baseline/pyucc__20251128T121455_local/files/pyucc/gui/gui.py """ from pathlib import Path import hashlib import sys import json def normalize_bytes(b: bytes) -> bytes: if b.startswith(b"\xef\xbb\xbf"): b = b[3:] b = b.replace(b"\r\n", b"\n") b = b.replace(b"\r", b"\n") return b def md5(b: bytes) -> str: return hashlib.md5(b).hexdigest() def phys_lines(b: bytes) -> int: nb = normalize_bytes(b) if len(nb) == 0: return 0 return nb.count(b"\n") + (0 if nb.endswith(b"\n") else 1) def load_bytes(p: Path): try: with p.open("rb") as fh: return fh.read() except Exception as e: print(f"Failed to read {p}: {e}") return None def analyze_path(p: Path): b = load_bytes(p) if b is None: return None nb = normalize_bytes(b) return { "path": str(p), "size": len(b), "raw_md5": md5(b), "norm_md5": md5(nb), "phys_lines": phys_lines(b), } def print_countings(p: Path): try: from pyucc.core.countings_impl import analyze_file_counts except Exception as e: print(f"Cannot import analyze_file_counts: {e}") analyze_file_counts = None info = analyze_path(p) if info is None: print(f"No info for {p}") return print(json.dumps(info, indent=2)) if analyze_file_counts is not None: try: c = analyze_file_counts(p) print("countings:", json.dumps(c, indent=2)) except Exception as e: print(f"analyze_file_counts failed: {e}") def main(): if len(sys.argv) < 2: print("Usage: python tools/inspect_counts.py []") sys.exit(2) cur = Path(sys.argv[1]) base = Path(sys.argv[2]) if len(sys.argv) > 2 else None print("\n=== Current file ===") print_countings(cur) if base: print("\n=== Baseline file ===") print_countings(base) if __name__ == "__main__": main()