S1005403_RisCC/tools/extract_pdf_commands.py

import sys
from pathlib import Path
p = Path('SUM7056227 Rev. A.pdf')
if not p.exists():
    print('PDF not found at', p)
    sys.exit(2)

# Try multiple PDF libraries
reader = None
try:
    from pypdf import PdfReader
    reader = PdfReader(str(p))
except Exception:
    try:
        import PyPDF2
        reader = PyPDF2.PdfReader(str(p))
    except Exception as e:
        print('No suitable PDF reader installed:', e)
        sys.exit(3)

text = []
for i,pg in enumerate(reader.pages):
    try:
        t = pg.extract_text() or ''
    except Exception:
        t = ''
    text.append(t)

full = '\n'.join(text)
# search for relevant keywords
keywords = ['tgtinit', 'tgtset', 'tgtreset', 'command', 'parameters', 'format']
found = False
for kw in keywords:
    idx = full.lower().find(kw)
    if idx != -1:
        found = True
        start = max(0, idx-200)
        end = min(len(full), idx+400)
        ctx = full[start:end]
        print('\n--- context around "{}" ---\n'.format(kw))
        print(ctx)

if not found:
    # fallback: print first 3000 chars for manual inspection
    print('\n--- No keywords found; printing first 3000 chars of PDF text ---\n')
    print(full[:3000])
sys.exit(0)
else:
    # Also print the specific pages around TOC entries (38-41) for clarity
    print('\n--- Explicitly printing pages 38-41 ---\n')
    for pi in range(max(0, 38-1), min(len(reader.pages), 41)):
        print(f'--- PAGE {pi+1} ---\n')
        try:
            print(reader.pages[pi].extract_text() or '')
        except Exception as e:
            print('ERROR extracting page', pi+1, e)
    sys.exit(0)