import sys from pathlib import Path p = Path('SUM7056227 Rev. A.pdf') if not p.exists(): print('PDF not found at', p) sys.exit(2) # Try multiple PDF libraries reader = None try: from pypdf import PdfReader reader = PdfReader(str(p)) except Exception: try: import PyPDF2 reader = PyPDF2.PdfReader(str(p)) except Exception as e: print('No suitable PDF reader installed:', e) sys.exit(3) text = [] for i,pg in enumerate(reader.pages): try: t = pg.extract_text() or '' except Exception: t = '' text.append(t) full = '\n'.join(text) # search for relevant keywords keywords = ['tgtinit', 'tgtset', 'tgtreset', 'command', 'parameters', 'format'] found = False for kw in keywords: idx = full.lower().find(kw) if idx != -1: found = True start = max(0, idx-200) end = min(len(full), idx+400) ctx = full[start:end] print('\n--- context around "{}" ---\n'.format(kw)) print(ctx) if not found: # fallback: print first 3000 chars for manual inspection print('\n--- No keywords found; printing first 3000 chars of PDF text ---\n') print(full[:3000]) sys.exit(0) else: # Also print the specific pages around TOC entries (38-41) for clarity print('\n--- Explicitly printing pages 38-41 ---\n') for pi in range(max(0, 38-1), min(len(reader.pages), 41)): print(f'--- PAGE {pi+1} ---\n') try: print(reader.pages[pi].extract_text() or '') except Exception as e: print('ERROR extracting page', pi+1, e) sys.exit(0)