"""Run pygount on two directories and compare per-file counting results. Usage: python tools/diagnose_pygount.py [--ext .py,.txt] Output: lists files with differing counting values or missing files. """ import argparse import json import subprocess from pathlib import Path from typing import Dict, Any, List import sys def collect_files(directory: Path, exts=None) -> List[Path]: files: List[Path] = [] for p in directory.rglob("*"): if p.is_file(): if exts: if not any(str(p).lower().endswith(e) for e in exts): continue files.append(p) return files def run_pygount_on_files(files: List[Path]) -> List[Dict[str, Any]]: if not files: return [] cmd = ["pygount", "--format", "json"] + [str(p) for p in files] proc = subprocess.run(cmd, check=False, capture_output=True, text=True) if proc.returncode != 0: print(f"pygount failed for {len(files)} files: returncode={proc.returncode}") print("stderr:\n", proc.stderr) parsed = [] if proc.stdout: text = proc.stdout.strip() if not text: parsed = [] else: try: parsed = json.loads(text) except Exception: # Try parse as newline-separated JSON objects parsed = [] for line in text.splitlines(): line = line.strip() if not line: continue try: parsed.append(json.loads(line)) except Exception: # ignore lines that are not JSON pass # If pygount returned a dict with 'files' key (formatVersion summary), extract it if isinstance(parsed, dict) and parsed.get("files"): parsed = parsed.get("files") return parsed def map_item(item: Dict[str, Any]) -> Dict[str, Any]: # Similar mapping as in countings_impl._map_pygount_json_item physical = ( item.get("lineCount") if item.get("lineCount") is not None else ( item.get("raw_total_lines") if item.get("raw_total_lines") is not None else ( item.get("n_lines") if item.get("n_lines") is not None else ( item.get("lines") if item.get("lines") is not None else ( item.get("raw_lines") if item.get("raw_lines") is not None else ( item.get("line_count") if item.get("line_count") is not None else 0 ) ) ) ) ) ) code = ( item.get("sourceCount") if item.get("sourceCount") is not None else ( item.get("codeCount") if item.get("codeCount") is not None else ( item.get("code") if item.get("code") is not None else ( item.get("n_code") if item.get("n_code") is not None else ( item.get("n_code_lines") if item.get("n_code_lines") is not None else ( item.get("code_lines") if item.get("code_lines") is not None else 0 ) ) ) ) ) ) comment = ( item.get("documentationCount") if item.get("documentationCount") is not None else ( item.get("comment") if item.get("comment") is not None else ( item.get("n_comment") if item.get("n_comment") is not None else ( item.get("n_comment_lines") if item.get("n_comment_lines") is not None else ( item.get("comment_lines") if item.get("comment_lines") is not None else 0 ) ) ) ) ) blank = ( item.get("emptyCount") if item.get("emptyCount") is not None else ( item.get("blank") if item.get("blank") is not None else ( item.get("n_blank") if item.get("n_blank") is not None else ( item.get("blank_lines") if item.get("blank_lines") is not None else ( item.get("empty_count") if item.get("empty_count") is not None else 0 ) ) ) ) ) language = ( item.get("language") or item.get("languageName") or item.get("lang") or "unknown" ) file_path = ( item.get("filename") or item.get("file") or item.get("path") or item.get("name") or "" ) return { "file": str(file_path).replace("\\\\", "/"), "physical_lines": int(physical), "code_lines": int(code), "comment_lines": int(comment), "blank_lines": int(blank), "language": language, } def build_map(parsed: List[Dict[str, Any]], base: Path) -> Dict[str, Dict[str, Any]]: result = {} for item in parsed: if not isinstance(item, dict): # skip unexpected output lines continue mapped = map_item(item) fname = mapped["file"] # If pygount returns absolute paths, make them relative to base when possible p = Path(fname) try: if p.is_absolute(): rel = str(p.relative_to(base)).replace("\\", "/") else: rel = fname.replace("\\", "/") except Exception: rel = fname.replace("\\", "/") result[rel] = mapped return result def compare_maps(mapA: Dict[str, Dict[str, Any]], mapB: Dict[str, Dict[str, Any]]): keysA = set(mapA.keys()) keysB = set(mapB.keys()) onlyA = sorted(keysA - keysB) onlyB = sorted(keysB - keysA) common = sorted(keysA & keysB) print( f"Files only in A: {len(onlyA)}; only in B: {len(onlyB)}; common: {len(common)}" ) if onlyA: print("\nOnly in A (examples):") for p in onlyA[:20]: print(" ", p) if onlyB: print("\nOnly in B (examples):") for p in onlyB[:20]: print(" ", p) diffs = [] for k in common: a = mapA[k] b = mapB[k] # Compare relevant numeric fields fields = ["physical_lines", "code_lines", "comment_lines", "blank_lines"] diff_fields = [f for f in fields if a.get(f) != b.get(f)] lang_diff = a.get("language") != b.get("language") if diff_fields or lang_diff: diffs.append((k, a, b, diff_fields, lang_diff)) print(f"Found {len(diffs)} differing files (by counting differences)") for k, a, b, diff_fields, lang_diff in diffs[:200]: print("\n--", k) for f in ["physical_lines", "code_lines", "comment_lines", "blank_lines"]: print(f" {f}: A={a.get(f)} B={b.get(f)}") if lang_diff: print(f" language: A={a.get('language')} B={b.get('language')}") if diff_fields: print(" diff fields:", diff_fields) if not diffs: print( "No counting differences detected (pygount results matched for all common files)." ) def main(): parser = argparse.ArgumentParser() parser.add_argument("dirA") parser.add_argument("dirB") parser.add_argument( "--ext", default=None, help="Comma-separated extensions to check, e.g. .py,.txt (not used by pygount run)", ) args = parser.parse_args() a = Path(args.dirA) b = Path(args.dirB) if not a.exists() or not b.exists(): print("One of the provided dirs does not exist:", a, b) sys.exit(2) exts = None if args.ext: exts = [e.strip().lower() for e in args.ext.split(",") if e.strip()] filesA = collect_files(a, exts) filesB = collect_files(b, exts) parsedA = run_pygount_on_files(filesA) parsedB = run_pygount_on_files(filesB) mapA = build_map(parsedA, a) mapB = build_map(parsedB, b) compare_maps(mapA, mapB) if __name__ == "__main__": main()