SXXXXXXX_PyUCC/tools/diagnose_pygount.py

280 lines
8.6 KiB
Python

"""Run pygount on two directories and compare per-file counting results.
Usage:
python tools/diagnose_pygount.py <dirA> <dirB> [--ext .py,.txt]
Output: lists files with differing counting values or missing files.
"""
import argparse
import json
import subprocess
from pathlib import Path
from typing import Dict, Any, List
import sys
def collect_files(directory: Path, exts=None) -> List[Path]:
files: List[Path] = []
for p in directory.rglob("*"):
if p.is_file():
if exts:
if not any(str(p).lower().endswith(e) for e in exts):
continue
files.append(p)
return files
def run_pygount_on_files(files: List[Path]) -> List[Dict[str, Any]]:
if not files:
return []
cmd = ["pygount", "--format", "json"] + [str(p) for p in files]
proc = subprocess.run(cmd, check=False, capture_output=True, text=True)
if proc.returncode != 0:
print(f"pygount failed for {len(files)} files: returncode={proc.returncode}")
print("stderr:\n", proc.stderr)
parsed = []
if proc.stdout:
text = proc.stdout.strip()
if not text:
parsed = []
else:
try:
parsed = json.loads(text)
except Exception:
# Try parse as newline-separated JSON objects
parsed = []
for line in text.splitlines():
line = line.strip()
if not line:
continue
try:
parsed.append(json.loads(line))
except Exception:
# ignore lines that are not JSON
pass
# If pygount returned a dict with 'files' key (formatVersion summary), extract it
if isinstance(parsed, dict) and parsed.get("files"):
parsed = parsed.get("files")
return parsed
def map_item(item: Dict[str, Any]) -> Dict[str, Any]:
# Similar mapping as in countings_impl._map_pygount_json_item
physical = (
item.get("lineCount")
if item.get("lineCount") is not None
else (
item.get("raw_total_lines")
if item.get("raw_total_lines") is not None
else (
item.get("n_lines")
if item.get("n_lines") is not None
else (
item.get("lines")
if item.get("lines") is not None
else (
item.get("raw_lines")
if item.get("raw_lines") is not None
else (
item.get("line_count")
if item.get("line_count") is not None
else 0
)
)
)
)
)
)
code = (
item.get("sourceCount")
if item.get("sourceCount") is not None
else (
item.get("codeCount")
if item.get("codeCount") is not None
else (
item.get("code")
if item.get("code") is not None
else (
item.get("n_code")
if item.get("n_code") is not None
else (
item.get("n_code_lines")
if item.get("n_code_lines") is not None
else (
item.get("code_lines")
if item.get("code_lines") is not None
else 0
)
)
)
)
)
)
comment = (
item.get("documentationCount")
if item.get("documentationCount") is not None
else (
item.get("comment")
if item.get("comment") is not None
else (
item.get("n_comment")
if item.get("n_comment") is not None
else (
item.get("n_comment_lines")
if item.get("n_comment_lines") is not None
else (
item.get("comment_lines")
if item.get("comment_lines") is not None
else 0
)
)
)
)
)
blank = (
item.get("emptyCount")
if item.get("emptyCount") is not None
else (
item.get("blank")
if item.get("blank") is not None
else (
item.get("n_blank")
if item.get("n_blank") is not None
else (
item.get("blank_lines")
if item.get("blank_lines") is not None
else (
item.get("empty_count")
if item.get("empty_count") is not None
else 0
)
)
)
)
)
language = (
item.get("language")
or item.get("languageName")
or item.get("lang")
or "unknown"
)
file_path = (
item.get("filename")
or item.get("file")
or item.get("path")
or item.get("name")
or ""
)
return {
"file": str(file_path).replace("\\\\", "/"),
"physical_lines": int(physical),
"code_lines": int(code),
"comment_lines": int(comment),
"blank_lines": int(blank),
"language": language,
}
def build_map(parsed: List[Dict[str, Any]], base: Path) -> Dict[str, Dict[str, Any]]:
result = {}
for item in parsed:
if not isinstance(item, dict):
# skip unexpected output lines
continue
mapped = map_item(item)
fname = mapped["file"]
# If pygount returns absolute paths, make them relative to base when possible
p = Path(fname)
try:
if p.is_absolute():
rel = str(p.relative_to(base)).replace("\\", "/")
else:
rel = fname.replace("\\", "/")
except Exception:
rel = fname.replace("\\", "/")
result[rel] = mapped
return result
def compare_maps(mapA: Dict[str, Dict[str, Any]], mapB: Dict[str, Dict[str, Any]]):
keysA = set(mapA.keys())
keysB = set(mapB.keys())
onlyA = sorted(keysA - keysB)
onlyB = sorted(keysB - keysA)
common = sorted(keysA & keysB)
print(
f"Files only in A: {len(onlyA)}; only in B: {len(onlyB)}; common: {len(common)}"
)
if onlyA:
print("\nOnly in A (examples):")
for p in onlyA[:20]:
print(" ", p)
if onlyB:
print("\nOnly in B (examples):")
for p in onlyB[:20]:
print(" ", p)
diffs = []
for k in common:
a = mapA[k]
b = mapB[k]
# Compare relevant numeric fields
fields = ["physical_lines", "code_lines", "comment_lines", "blank_lines"]
diff_fields = [f for f in fields if a.get(f) != b.get(f)]
lang_diff = a.get("language") != b.get("language")
if diff_fields or lang_diff:
diffs.append((k, a, b, diff_fields, lang_diff))
print(f"Found {len(diffs)} differing files (by counting differences)")
for k, a, b, diff_fields, lang_diff in diffs[:200]:
print("\n--", k)
for f in ["physical_lines", "code_lines", "comment_lines", "blank_lines"]:
print(f" {f}: A={a.get(f)} B={b.get(f)}")
if lang_diff:
print(f" language: A={a.get('language')} B={b.get('language')}")
if diff_fields:
print(" diff fields:", diff_fields)
if not diffs:
print(
"No counting differences detected (pygount results matched for all common files)."
)
def main():
parser = argparse.ArgumentParser()
parser.add_argument("dirA")
parser.add_argument("dirB")
parser.add_argument(
"--ext",
default=None,
help="Comma-separated extensions to check, e.g. .py,.txt (not used by pygount run)",
)
args = parser.parse_args()
a = Path(args.dirA)
b = Path(args.dirB)
if not a.exists() or not b.exists():
print("One of the provided dirs does not exist:", a, b)
sys.exit(2)
exts = None
if args.ext:
exts = [e.strip().lower() for e in args.ext.split(",") if e.strip()]
filesA = collect_files(a, exts)
filesB = collect_files(b, exts)
parsedA = run_pygount_on_files(filesA)
parsedB = run_pygount_on_files(filesB)
mapA = build_map(parsedA, a)
mapB = build_map(parsedB, b)
compare_maps(mapA, mapB)
if __name__ == "__main__":
main()