85 lines
2.2 KiB
Python
85 lines
2.2 KiB
Python
"""Inspect counting results and normalized hashes for two files.
|
|
|
|
Usage:
|
|
python tools/inspect_counts.py <path_to_current_file> <path_to_baseline_file>
|
|
|
|
Example:
|
|
python tools/inspect_counts.py pyucc/gui/gui.py baseline/pyucc__20251128T121455_local/files/pyucc/gui/gui.py
|
|
"""
|
|
from pathlib import Path
|
|
import hashlib
|
|
import sys
|
|
import json
|
|
|
|
def normalize_bytes(b: bytes) -> bytes:
|
|
if b.startswith(b"\xef\xbb\xbf"):
|
|
b = b[3:]
|
|
b = b.replace(b"\r\n", b"\n")
|
|
b = b.replace(b"\r", b"\n")
|
|
return b
|
|
|
|
def md5(b: bytes) -> str:
|
|
return hashlib.md5(b).hexdigest()
|
|
|
|
def phys_lines(b: bytes) -> int:
|
|
nb = normalize_bytes(b)
|
|
if len(nb) == 0:
|
|
return 0
|
|
return nb.count(b"\n") + (0 if nb.endswith(b"\n") else 1)
|
|
|
|
def load_bytes(p: Path):
|
|
try:
|
|
with p.open("rb") as fh:
|
|
return fh.read()
|
|
except Exception as e:
|
|
print(f"Failed to read {p}: {e}")
|
|
return None
|
|
|
|
def analyze_path(p: Path):
|
|
b = load_bytes(p)
|
|
if b is None:
|
|
return None
|
|
nb = normalize_bytes(b)
|
|
return {
|
|
"path": str(p),
|
|
"size": len(b),
|
|
"raw_md5": md5(b),
|
|
"norm_md5": md5(nb),
|
|
"phys_lines": phys_lines(b),
|
|
}
|
|
|
|
def print_countings(p: Path):
|
|
try:
|
|
from pyucc.core.countings_impl import analyze_file_counts
|
|
except Exception as e:
|
|
print(f"Cannot import analyze_file_counts: {e}")
|
|
analyze_file_counts = None
|
|
|
|
info = analyze_path(p)
|
|
if info is None:
|
|
print(f"No info for {p}")
|
|
return
|
|
print(json.dumps(info, indent=2))
|
|
if analyze_file_counts is not None:
|
|
try:
|
|
c = analyze_file_counts(p)
|
|
print("countings:", json.dumps(c, indent=2))
|
|
except Exception as e:
|
|
print(f"analyze_file_counts failed: {e}")
|
|
|
|
def main():
|
|
if len(sys.argv) < 2:
|
|
print("Usage: python tools/inspect_counts.py <current_path> [<baseline_path>]")
|
|
sys.exit(2)
|
|
cur = Path(sys.argv[1])
|
|
base = Path(sys.argv[2]) if len(sys.argv) > 2 else None
|
|
|
|
print("\n=== Current file ===")
|
|
print_countings(cur)
|
|
if base:
|
|
print("\n=== Baseline file ===")
|
|
print_countings(base)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|