229 lines
7.4 KiB
Python
229 lines
7.4 KiB
Python
"""Run pygount on two directories and compare per-file counting results.
|
|
|
|
Usage:
|
|
python tools/diagnose_pygount.py <dirA> <dirB> [--ext .py,.txt]
|
|
|
|
Output: lists files with differing counting values or missing files.
|
|
"""
|
|
import argparse
|
|
import json
|
|
import subprocess
|
|
from pathlib import Path
|
|
from typing import Dict, Any, List
|
|
import sys
|
|
|
|
|
|
def collect_files(directory: Path, exts=None) -> List[Path]:
|
|
files: List[Path] = []
|
|
for p in directory.rglob("*"):
|
|
if p.is_file():
|
|
if exts:
|
|
if not any(str(p).lower().endswith(e) for e in exts):
|
|
continue
|
|
files.append(p)
|
|
return files
|
|
|
|
|
|
def run_pygount_on_files(files: List[Path]) -> List[Dict[str, Any]]:
|
|
if not files:
|
|
return []
|
|
cmd = ["pygount", "--format", "json"] + [str(p) for p in files]
|
|
proc = subprocess.run(cmd, check=False, capture_output=True, text=True)
|
|
if proc.returncode != 0:
|
|
print(f"pygount failed for {len(files)} files: returncode={proc.returncode}")
|
|
print("stderr:\n", proc.stderr)
|
|
parsed = []
|
|
if proc.stdout:
|
|
text = proc.stdout.strip()
|
|
if not text:
|
|
parsed = []
|
|
else:
|
|
try:
|
|
parsed = json.loads(text)
|
|
except Exception:
|
|
# Try parse as newline-separated JSON objects
|
|
parsed = []
|
|
for line in text.splitlines():
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
try:
|
|
parsed.append(json.loads(line))
|
|
except Exception:
|
|
# ignore lines that are not JSON
|
|
pass
|
|
# If pygount returned a dict with 'files' key (formatVersion summary), extract it
|
|
if isinstance(parsed, dict) and parsed.get('files'):
|
|
parsed = parsed.get('files')
|
|
return parsed
|
|
|
|
|
|
def map_item(item: Dict[str, Any]) -> Dict[str, Any]:
|
|
# Similar mapping as in countings_impl._map_pygount_json_item
|
|
physical = (
|
|
item.get("lineCount")
|
|
if item.get("lineCount") is not None else
|
|
item.get("raw_total_lines")
|
|
if item.get("raw_total_lines") is not None else
|
|
item.get("n_lines")
|
|
if item.get("n_lines") is not None else
|
|
item.get("lines")
|
|
if item.get("lines") is not None else
|
|
item.get("raw_lines")
|
|
if item.get("raw_lines") is not None else
|
|
item.get("line_count")
|
|
if item.get("line_count") is not None else
|
|
0
|
|
)
|
|
code = (
|
|
item.get("sourceCount")
|
|
if item.get("sourceCount") is not None else
|
|
item.get("codeCount")
|
|
if item.get("codeCount") is not None else
|
|
item.get("code")
|
|
if item.get("code") is not None else
|
|
item.get("n_code")
|
|
if item.get("n_code") is not None else
|
|
item.get("n_code_lines")
|
|
if item.get("n_code_lines") is not None else
|
|
item.get("code_lines")
|
|
if item.get("code_lines") is not None else
|
|
0
|
|
)
|
|
comment = (
|
|
item.get("documentationCount")
|
|
if item.get("documentationCount") is not None else
|
|
item.get("comment")
|
|
if item.get("comment") is not None else
|
|
item.get("n_comment")
|
|
if item.get("n_comment") is not None else
|
|
item.get("n_comment_lines")
|
|
if item.get("n_comment_lines") is not None else
|
|
item.get("comment_lines")
|
|
if item.get("comment_lines") is not None else
|
|
0
|
|
)
|
|
blank = (
|
|
item.get("emptyCount")
|
|
if item.get("emptyCount") is not None else
|
|
item.get("blank")
|
|
if item.get("blank") is not None else
|
|
item.get("n_blank")
|
|
if item.get("n_blank") is not None else
|
|
item.get("blank_lines")
|
|
if item.get("blank_lines") is not None else
|
|
item.get("empty_count")
|
|
if item.get("empty_count") is not None else
|
|
0
|
|
)
|
|
language = item.get("language") or item.get("languageName") or item.get("lang") or "unknown"
|
|
file_path = (
|
|
item.get("filename")
|
|
or item.get("file")
|
|
or item.get("path")
|
|
or item.get("name")
|
|
or ""
|
|
)
|
|
return {
|
|
"file": str(file_path).replace('\\\\', '/'),
|
|
"physical_lines": int(physical),
|
|
"code_lines": int(code),
|
|
"comment_lines": int(comment),
|
|
"blank_lines": int(blank),
|
|
"language": language,
|
|
}
|
|
|
|
|
|
def build_map(parsed: List[Dict[str, Any]], base: Path) -> Dict[str, Dict[str, Any]]:
|
|
result = {}
|
|
for item in parsed:
|
|
if not isinstance(item, dict):
|
|
# skip unexpected output lines
|
|
continue
|
|
mapped = map_item(item)
|
|
fname = mapped['file']
|
|
# If pygount returns absolute paths, make them relative to base when possible
|
|
p = Path(fname)
|
|
try:
|
|
if p.is_absolute():
|
|
rel = str(p.relative_to(base)).replace('\\', '/')
|
|
else:
|
|
rel = fname.replace('\\', '/')
|
|
except Exception:
|
|
rel = fname.replace('\\', '/')
|
|
result[rel] = mapped
|
|
return result
|
|
|
|
|
|
def compare_maps(mapA: Dict[str, Dict[str, Any]], mapB: Dict[str, Dict[str, Any]]):
|
|
keysA = set(mapA.keys())
|
|
keysB = set(mapB.keys())
|
|
onlyA = sorted(keysA - keysB)
|
|
onlyB = sorted(keysB - keysA)
|
|
common = sorted(keysA & keysB)
|
|
|
|
print(f'Files only in A: {len(onlyA)}; only in B: {len(onlyB)}; common: {len(common)}')
|
|
if onlyA:
|
|
print('\nOnly in A (examples):')
|
|
for p in onlyA[:20]:
|
|
print(' ', p)
|
|
if onlyB:
|
|
print('\nOnly in B (examples):')
|
|
for p in onlyB[:20]:
|
|
print(' ', p)
|
|
|
|
diffs = []
|
|
for k in common:
|
|
a = mapA[k]
|
|
b = mapB[k]
|
|
# Compare relevant numeric fields
|
|
fields = ['physical_lines', 'code_lines', 'comment_lines', 'blank_lines']
|
|
diff_fields = [f for f in fields if a.get(f) != b.get(f)]
|
|
lang_diff = a.get('language') != b.get('language')
|
|
if diff_fields or lang_diff:
|
|
diffs.append((k, a, b, diff_fields, lang_diff))
|
|
print(f'Found {len(diffs)} differing files (by counting differences)')
|
|
for k, a, b, diff_fields, lang_diff in diffs[:200]:
|
|
print('\n--', k)
|
|
for f in ['physical_lines', 'code_lines', 'comment_lines', 'blank_lines']:
|
|
print(f" {f}: A={a.get(f)} B={b.get(f)}")
|
|
if lang_diff:
|
|
print(f" language: A={a.get('language')} B={b.get('language')}")
|
|
if diff_fields:
|
|
print(' diff fields:', diff_fields)
|
|
|
|
if not diffs:
|
|
print('No counting differences detected (pygount results matched for all common files).')
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument('dirA')
|
|
parser.add_argument('dirB')
|
|
parser.add_argument('--ext', default=None, help='Comma-separated extensions to check, e.g. .py,.txt (not used by pygount run)')
|
|
args = parser.parse_args()
|
|
|
|
a = Path(args.dirA)
|
|
b = Path(args.dirB)
|
|
if not a.exists() or not b.exists():
|
|
print('One of the provided dirs does not exist:', a, b)
|
|
sys.exit(2)
|
|
|
|
exts = None
|
|
if args.ext:
|
|
exts = [e.strip().lower() for e in args.ext.split(',') if e.strip()]
|
|
|
|
filesA = collect_files(a, exts)
|
|
filesB = collect_files(b, exts)
|
|
|
|
parsedA = run_pygount_on_files(filesA)
|
|
parsedB = run_pygount_on_files(filesB)
|
|
|
|
mapA = build_map(parsedA, a)
|
|
mapB = build_map(parsedB, b)
|
|
|
|
compare_maps(mapA, mapB)
|
|
|
|
if __name__ == '__main__':
|
|
main()
|