61 lines
1.6 KiB
Python
61 lines
1.6 KiB
Python
import sys
|
|
from pathlib import Path
|
|
|
|
p = Path("SUM7056227 Rev. A.pdf")
|
|
if not p.exists():
|
|
print("PDF not found at", p)
|
|
sys.exit(2)
|
|
|
|
# Try multiple PDF libraries
|
|
reader = None
|
|
try:
|
|
from pypdf import PdfReader
|
|
|
|
reader = PdfReader(str(p))
|
|
except Exception:
|
|
try:
|
|
import PyPDF2
|
|
|
|
reader = PyPDF2.PdfReader(str(p))
|
|
except Exception as e:
|
|
print("No suitable PDF reader installed:", e)
|
|
sys.exit(3)
|
|
|
|
text = []
|
|
for i, pg in enumerate(reader.pages):
|
|
try:
|
|
t = pg.extract_text() or ""
|
|
except Exception:
|
|
t = ""
|
|
text.append(t)
|
|
|
|
full = "\n".join(text)
|
|
# search for relevant keywords
|
|
keywords = ["tgtinit", "tgtset", "tgtreset", "command", "parameters", "format"]
|
|
found = False
|
|
for kw in keywords:
|
|
idx = full.lower().find(kw)
|
|
if idx != -1:
|
|
found = True
|
|
start = max(0, idx - 200)
|
|
end = min(len(full), idx + 400)
|
|
ctx = full[start:end]
|
|
print('\n--- context around "{}" ---\n'.format(kw))
|
|
print(ctx)
|
|
|
|
if not found:
|
|
# fallback: print first 3000 chars for manual inspection
|
|
print("\n--- No keywords found; printing first 3000 chars of PDF text ---\n")
|
|
print(full[:3000])
|
|
sys.exit(0)
|
|
else:
|
|
# Also print the specific pages around TOC entries (38-41) for clarity
|
|
print("\n--- Explicitly printing pages 38-41 ---\n")
|
|
for pi in range(max(0, 38 - 1), min(len(reader.pages), 41)):
|
|
print(f"--- PAGE {pi+1} ---\n")
|
|
try:
|
|
print(reader.pages[pi].extract_text() or "")
|
|
except Exception as e:
|
|
print("ERROR extracting page", pi + 1, e)
|
|
sys.exit(0)
|