"""Run pygount on two directories and compare per-file counting results.

Usage:
    python tools/diagnose_pygount.py <dirA> <dirB> [--ext .py,.txt]

Output: lists files with differing counting values or missing files.
"""

import argparse
import json
import subprocess
from pathlib import Path
from typing import Dict, Any, List
import sys


def collect_files(directory: Path, exts=None) -> List[Path]:
    files: List[Path] = []
    for p in directory.rglob("*"):
        if p.is_file():
            if exts:
                if not any(str(p).lower().endswith(e) for e in exts):
                    continue
            files.append(p)
    return files


def run_pygount_on_files(files: List[Path]) -> List[Dict[str, Any]]:
    if not files:
        return []
    cmd = ["pygount", "--format", "json"] + [str(p) for p in files]
    proc = subprocess.run(cmd, check=False, capture_output=True, text=True)
    if proc.returncode != 0:
        print(f"pygount failed for {len(files)} files: returncode={proc.returncode}")
        print("stderr:\n", proc.stderr)
    parsed = []
    if proc.stdout:
        text = proc.stdout.strip()
        if not text:
            parsed = []
        else:
            try:
                parsed = json.loads(text)
            except Exception:
                # Try parse as newline-separated JSON objects
                parsed = []
                for line in text.splitlines():
                    line = line.strip()
                    if not line:
                        continue
                    try:
                        parsed.append(json.loads(line))
                    except Exception:
                        # ignore lines that are not JSON
                        pass
    # If pygount returned a dict with 'files' key (formatVersion summary), extract it
    if isinstance(parsed, dict) and parsed.get("files"):
        parsed = parsed.get("files")
    return parsed


def map_item(item: Dict[str, Any]) -> Dict[str, Any]:
    # Similar mapping as in countings_impl._map_pygount_json_item
    physical = (
        item.get("lineCount")
        if item.get("lineCount") is not None
        else (
            item.get("raw_total_lines")
            if item.get("raw_total_lines") is not None
            else (
                item.get("n_lines")
                if item.get("n_lines") is not None
                else (
                    item.get("lines")
                    if item.get("lines") is not None
                    else (
                        item.get("raw_lines")
                        if item.get("raw_lines") is not None
                        else (
                            item.get("line_count")
                            if item.get("line_count") is not None
                            else 0
                        )
                    )
                )
            )
        )
    )
    code = (
        item.get("sourceCount")
        if item.get("sourceCount") is not None
        else (
            item.get("codeCount")
            if item.get("codeCount") is not None
            else (
                item.get("code")
                if item.get("code") is not None
                else (
                    item.get("n_code")
                    if item.get("n_code") is not None
                    else (
                        item.get("n_code_lines")
                        if item.get("n_code_lines") is not None
                        else (
                            item.get("code_lines")
                            if item.get("code_lines") is not None
                            else 0
                        )
                    )
                )
            )
        )
    )
    comment = (
        item.get("documentationCount")
        if item.get("documentationCount") is not None
        else (
            item.get("comment")
            if item.get("comment") is not None
            else (
                item.get("n_comment")
                if item.get("n_comment") is not None
                else (
                    item.get("n_comment_lines")
                    if item.get("n_comment_lines") is not None
                    else (
                        item.get("comment_lines")
                        if item.get("comment_lines") is not None
                        else 0
                    )
                )
            )
        )
    )
    blank = (
        item.get("emptyCount")
        if item.get("emptyCount") is not None
        else (
            item.get("blank")
            if item.get("blank") is not None
            else (
                item.get("n_blank")
                if item.get("n_blank") is not None
                else (
                    item.get("blank_lines")
                    if item.get("blank_lines") is not None
                    else (
                        item.get("empty_count")
                        if item.get("empty_count") is not None
                        else 0
                    )
                )
            )
        )
    )
    language = (
        item.get("language")
        or item.get("languageName")
        or item.get("lang")
        or "unknown"
    )
    file_path = (
        item.get("filename")
        or item.get("file")
        or item.get("path")
        or item.get("name")
        or ""
    )
    return {
        "file": str(file_path).replace("\\\\", "/"),
        "physical_lines": int(physical),
        "code_lines": int(code),
        "comment_lines": int(comment),
        "blank_lines": int(blank),
        "language": language,
    }


def build_map(parsed: List[Dict[str, Any]], base: Path) -> Dict[str, Dict[str, Any]]:
    result = {}
    for item in parsed:
        if not isinstance(item, dict):
            # skip unexpected output lines
            continue
        mapped = map_item(item)
        fname = mapped["file"]
        # If pygount returns absolute paths, make them relative to base when possible
        p = Path(fname)
        try:
            if p.is_absolute():
                rel = str(p.relative_to(base)).replace("\\", "/")
            else:
                rel = fname.replace("\\", "/")
        except Exception:
            rel = fname.replace("\\", "/")
        result[rel] = mapped
    return result


def compare_maps(mapA: Dict[str, Dict[str, Any]], mapB: Dict[str, Dict[str, Any]]):
    keysA = set(mapA.keys())
    keysB = set(mapB.keys())
    onlyA = sorted(keysA - keysB)
    onlyB = sorted(keysB - keysA)
    common = sorted(keysA & keysB)

    print(
        f"Files only in A: {len(onlyA)}; only in B: {len(onlyB)}; common: {len(common)}"
    )
    if onlyA:
        print("\nOnly in A (examples):")
        for p in onlyA[:20]:
            print("  ", p)
    if onlyB:
        print("\nOnly in B (examples):")
        for p in onlyB[:20]:
            print("  ", p)

    diffs = []
    for k in common:
        a = mapA[k]
        b = mapB[k]
        # Compare relevant numeric fields
        fields = ["physical_lines", "code_lines", "comment_lines", "blank_lines"]
        diff_fields = [f for f in fields if a.get(f) != b.get(f)]
        lang_diff = a.get("language") != b.get("language")
        if diff_fields or lang_diff:
            diffs.append((k, a, b, diff_fields, lang_diff))
    print(f"Found {len(diffs)} differing files (by counting differences)")
    for k, a, b, diff_fields, lang_diff in diffs[:200]:
        print("\n--", k)
        for f in ["physical_lines", "code_lines", "comment_lines", "blank_lines"]:
            print(f"  {f}: A={a.get(f)} B={b.get(f)}")
        if lang_diff:
            print(f"  language: A={a.get('language')} B={b.get('language')}")
        if diff_fields:
            print("  diff fields:", diff_fields)

    if not diffs:
        print(
            "No counting differences detected (pygount results matched for all common files)."
        )


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("dirA")
    parser.add_argument("dirB")
    parser.add_argument(
        "--ext",
        default=None,
        help="Comma-separated extensions to check, e.g. .py,.txt (not used by pygount run)",
    )
    args = parser.parse_args()

    a = Path(args.dirA)
    b = Path(args.dirB)
    if not a.exists() or not b.exists():
        print("One of the provided dirs does not exist:", a, b)
        sys.exit(2)

    exts = None
    if args.ext:
        exts = [e.strip().lower() for e in args.ext.split(",") if e.strip()]

    filesA = collect_files(a, exts)
    filesB = collect_files(b, exts)

    parsedA = run_pygount_on_files(filesA)
    parsedB = run_pygount_on_files(filesB)

    mapA = build_map(parsedA, a)
    mapB = build_map(parsedB, b)

    compare_maps(mapA, mapB)


if __name__ == "__main__":
    main()