import sys from pathlib import Path p = Path("SUM7056227 Rev. A.pdf") if not p.exists(): print("PDF not found at", p) sys.exit(2) # Try multiple PDF libraries reader = None try: from pypdf import PdfReader reader = PdfReader(str(p)) except Exception: try: import PyPDF2 reader = PyPDF2.PdfReader(str(p)) except Exception as e: print("No suitable PDF reader installed:", e) sys.exit(3) text = [] for i, pg in enumerate(reader.pages): try: t = pg.extract_text() or "" except Exception: t = "" text.append(t) full = "\n".join(text) # search for relevant keywords keywords = ["tgtinit", "tgtset", "tgtreset", "command", "parameters", "format"] found = False for kw in keywords: idx = full.lower().find(kw) if idx != -1: found = True start = max(0, idx - 200) end = min(len(full), idx + 400) ctx = full[start:end] print('\n--- context around "{}" ---\n'.format(kw)) print(ctx) if not found: # fallback: print first 3000 chars for manual inspection print("\n--- No keywords found; printing first 3000 chars of PDF text ---\n") print(full[:3000]) sys.exit(0) else: # Also print the specific pages around TOC entries (38-41) for clarity print("\n--- Explicitly printing pages 38-41 ---\n") for pi in range(max(0, 38 - 1), min(len(reader.pages), 41)): print(f"--- PAGE {pi+1} ---\n") try: print(reader.pages[pi].extract_text() or "") except Exception as e: print("ERROR extracting page", pi + 1, e) sys.exit(0)