SXXXXXXX_PyUCC/tools/diagnose_pygount.py

229 lines
7.4 KiB
Python

"""Run pygount on two directories and compare per-file counting results.
Usage:
python tools/diagnose_pygount.py <dirA> <dirB> [--ext .py,.txt]
Output: lists files with differing counting values or missing files.
"""
import argparse
import json
import subprocess
from pathlib import Path
from typing import Dict, Any, List
import sys
def collect_files(directory: Path, exts=None) -> List[Path]:
files: List[Path] = []
for p in directory.rglob("*"):
if p.is_file():
if exts:
if not any(str(p).lower().endswith(e) for e in exts):
continue
files.append(p)
return files
def run_pygount_on_files(files: List[Path]) -> List[Dict[str, Any]]:
if not files:
return []
cmd = ["pygount", "--format", "json"] + [str(p) for p in files]
proc = subprocess.run(cmd, check=False, capture_output=True, text=True)
if proc.returncode != 0:
print(f"pygount failed for {len(files)} files: returncode={proc.returncode}")
print("stderr:\n", proc.stderr)
parsed = []
if proc.stdout:
text = proc.stdout.strip()
if not text:
parsed = []
else:
try:
parsed = json.loads(text)
except Exception:
# Try parse as newline-separated JSON objects
parsed = []
for line in text.splitlines():
line = line.strip()
if not line:
continue
try:
parsed.append(json.loads(line))
except Exception:
# ignore lines that are not JSON
pass
# If pygount returned a dict with 'files' key (formatVersion summary), extract it
if isinstance(parsed, dict) and parsed.get('files'):
parsed = parsed.get('files')
return parsed
def map_item(item: Dict[str, Any]) -> Dict[str, Any]:
# Similar mapping as in countings_impl._map_pygount_json_item
physical = (
item.get("lineCount")
if item.get("lineCount") is not None else
item.get("raw_total_lines")
if item.get("raw_total_lines") is not None else
item.get("n_lines")
if item.get("n_lines") is not None else
item.get("lines")
if item.get("lines") is not None else
item.get("raw_lines")
if item.get("raw_lines") is not None else
item.get("line_count")
if item.get("line_count") is not None else
0
)
code = (
item.get("sourceCount")
if item.get("sourceCount") is not None else
item.get("codeCount")
if item.get("codeCount") is not None else
item.get("code")
if item.get("code") is not None else
item.get("n_code")
if item.get("n_code") is not None else
item.get("n_code_lines")
if item.get("n_code_lines") is not None else
item.get("code_lines")
if item.get("code_lines") is not None else
0
)
comment = (
item.get("documentationCount")
if item.get("documentationCount") is not None else
item.get("comment")
if item.get("comment") is not None else
item.get("n_comment")
if item.get("n_comment") is not None else
item.get("n_comment_lines")
if item.get("n_comment_lines") is not None else
item.get("comment_lines")
if item.get("comment_lines") is not None else
0
)
blank = (
item.get("emptyCount")
if item.get("emptyCount") is not None else
item.get("blank")
if item.get("blank") is not None else
item.get("n_blank")
if item.get("n_blank") is not None else
item.get("blank_lines")
if item.get("blank_lines") is not None else
item.get("empty_count")
if item.get("empty_count") is not None else
0
)
language = item.get("language") or item.get("languageName") or item.get("lang") or "unknown"
file_path = (
item.get("filename")
or item.get("file")
or item.get("path")
or item.get("name")
or ""
)
return {
"file": str(file_path).replace('\\\\', '/'),
"physical_lines": int(physical),
"code_lines": int(code),
"comment_lines": int(comment),
"blank_lines": int(blank),
"language": language,
}
def build_map(parsed: List[Dict[str, Any]], base: Path) -> Dict[str, Dict[str, Any]]:
result = {}
for item in parsed:
if not isinstance(item, dict):
# skip unexpected output lines
continue
mapped = map_item(item)
fname = mapped['file']
# If pygount returns absolute paths, make them relative to base when possible
p = Path(fname)
try:
if p.is_absolute():
rel = str(p.relative_to(base)).replace('\\', '/')
else:
rel = fname.replace('\\', '/')
except Exception:
rel = fname.replace('\\', '/')
result[rel] = mapped
return result
def compare_maps(mapA: Dict[str, Dict[str, Any]], mapB: Dict[str, Dict[str, Any]]):
keysA = set(mapA.keys())
keysB = set(mapB.keys())
onlyA = sorted(keysA - keysB)
onlyB = sorted(keysB - keysA)
common = sorted(keysA & keysB)
print(f'Files only in A: {len(onlyA)}; only in B: {len(onlyB)}; common: {len(common)}')
if onlyA:
print('\nOnly in A (examples):')
for p in onlyA[:20]:
print(' ', p)
if onlyB:
print('\nOnly in B (examples):')
for p in onlyB[:20]:
print(' ', p)
diffs = []
for k in common:
a = mapA[k]
b = mapB[k]
# Compare relevant numeric fields
fields = ['physical_lines', 'code_lines', 'comment_lines', 'blank_lines']
diff_fields = [f for f in fields if a.get(f) != b.get(f)]
lang_diff = a.get('language') != b.get('language')
if diff_fields or lang_diff:
diffs.append((k, a, b, diff_fields, lang_diff))
print(f'Found {len(diffs)} differing files (by counting differences)')
for k, a, b, diff_fields, lang_diff in diffs[:200]:
print('\n--', k)
for f in ['physical_lines', 'code_lines', 'comment_lines', 'blank_lines']:
print(f" {f}: A={a.get(f)} B={b.get(f)}")
if lang_diff:
print(f" language: A={a.get('language')} B={b.get('language')}")
if diff_fields:
print(' diff fields:', diff_fields)
if not diffs:
print('No counting differences detected (pygount results matched for all common files).')
def main():
parser = argparse.ArgumentParser()
parser.add_argument('dirA')
parser.add_argument('dirB')
parser.add_argument('--ext', default=None, help='Comma-separated extensions to check, e.g. .py,.txt (not used by pygount run)')
args = parser.parse_args()
a = Path(args.dirA)
b = Path(args.dirB)
if not a.exists() or not b.exists():
print('One of the provided dirs does not exist:', a, b)
sys.exit(2)
exts = None
if args.ext:
exts = [e.strip().lower() for e in args.ext.split(',') if e.strip()]
filesA = collect_files(a, exts)
filesB = collect_files(b, exts)
parsedA = run_pygount_on_files(filesA)
parsedB = run_pygount_on_files(filesB)
mapA = build_map(parsedA, a)
mapB = build_map(parsedB, b)
compare_maps(mapA, mapB)
if __name__ == '__main__':
main()