SXXXXXXX_PyUCC/tools/inspect_counts.py

85 lines
2.2 KiB
Python

"""Inspect counting results and normalized hashes for two files.
Usage:
python tools/inspect_counts.py <path_to_current_file> <path_to_baseline_file>
Example:
python tools/inspect_counts.py pyucc/gui/gui.py baseline/pyucc__20251128T121455_local/files/pyucc/gui/gui.py
"""
from pathlib import Path
import hashlib
import sys
import json
def normalize_bytes(b: bytes) -> bytes:
if b.startswith(b"\xef\xbb\xbf"):
b = b[3:]
b = b.replace(b"\r\n", b"\n")
b = b.replace(b"\r", b"\n")
return b
def md5(b: bytes) -> str:
return hashlib.md5(b).hexdigest()
def phys_lines(b: bytes) -> int:
nb = normalize_bytes(b)
if len(nb) == 0:
return 0
return nb.count(b"\n") + (0 if nb.endswith(b"\n") else 1)
def load_bytes(p: Path):
try:
with p.open("rb") as fh:
return fh.read()
except Exception as e:
print(f"Failed to read {p}: {e}")
return None
def analyze_path(p: Path):
b = load_bytes(p)
if b is None:
return None
nb = normalize_bytes(b)
return {
"path": str(p),
"size": len(b),
"raw_md5": md5(b),
"norm_md5": md5(nb),
"phys_lines": phys_lines(b),
}
def print_countings(p: Path):
try:
from pyucc.core.countings_impl import analyze_file_counts
except Exception as e:
print(f"Cannot import analyze_file_counts: {e}")
analyze_file_counts = None
info = analyze_path(p)
if info is None:
print(f"No info for {p}")
return
print(json.dumps(info, indent=2))
if analyze_file_counts is not None:
try:
c = analyze_file_counts(p)
print("countings:", json.dumps(c, indent=2))
except Exception as e:
print(f"analyze_file_counts failed: {e}")
def main():
if len(sys.argv) < 2:
print("Usage: python tools/inspect_counts.py <current_path> [<baseline_path>]")
sys.exit(2)
cur = Path(sys.argv[1])
base = Path(sys.argv[2]) if len(sys.argv) > 2 else None
print("\n=== Current file ===")
print_countings(cur)
if base:
print("\n=== Baseline file ===")
print_countings(base)
if __name__ == "__main__":
main()