S1005403_RisCC/tools/extract_pdf_commands.py

import sys
from pathlib import Path

p = Path("SUM7056227 Rev. A.pdf")
if not p.exists():
    print("PDF not found at", p)
    sys.exit(2)

# Try multiple PDF libraries
reader = None
try:
    from pypdf import PdfReader

    reader = PdfReader(str(p))
except Exception:
    try:
        import PyPDF2

        reader = PyPDF2.PdfReader(str(p))
    except Exception as e:
        print("No suitable PDF reader installed:", e)
        sys.exit(3)

text = []
for i, pg in enumerate(reader.pages):
    try:
        t = pg.extract_text() or ""
    except Exception:
        t = ""
    text.append(t)

full = "\n".join(text)
# search for relevant keywords
keywords = ["tgtinit", "tgtset", "tgtreset", "command", "parameters", "format"]
found = False
for kw in keywords:
    idx = full.lower().find(kw)
    if idx != -1:
        found = True
        start = max(0, idx - 200)
        end = min(len(full), idx + 400)
        ctx = full[start:end]
        print('\n--- context around "{}" ---\n'.format(kw))
        print(ctx)

if not found:
    # fallback: print first 3000 chars for manual inspection
    print("\n--- No keywords found; printing first 3000 chars of PDF text ---\n")
    print(full[:3000])
    sys.exit(0)
else:
    # Also print the specific pages around TOC entries (38-41) for clarity
    print("\n--- Explicitly printing pages 38-41 ---\n")
    for pi in range(max(0, 38 - 1), min(len(reader.pages), 41)):
        print(f"--- PAGE {pi+1} ---\n")
        try:
            print(reader.pages[pi].extract_text() or "")
        except Exception as e:
            print("ERROR extracting page", pi + 1, e)
    sys.exit(0)