58 lines
1.6 KiB
Python
58 lines
1.6 KiB
Python
import sys
|
|
from pathlib import Path
|
|
p = Path('SUM7056227 Rev. A.pdf')
|
|
if not p.exists():
|
|
print('PDF not found at', p)
|
|
sys.exit(2)
|
|
|
|
# Try multiple PDF libraries
|
|
reader = None
|
|
try:
|
|
from pypdf import PdfReader
|
|
reader = PdfReader(str(p))
|
|
except Exception:
|
|
try:
|
|
import PyPDF2
|
|
reader = PyPDF2.PdfReader(str(p))
|
|
except Exception as e:
|
|
print('No suitable PDF reader installed:', e)
|
|
sys.exit(3)
|
|
|
|
text = []
|
|
for i,pg in enumerate(reader.pages):
|
|
try:
|
|
t = pg.extract_text() or ''
|
|
except Exception:
|
|
t = ''
|
|
text.append(t)
|
|
|
|
full = '\n'.join(text)
|
|
# search for relevant keywords
|
|
keywords = ['tgtinit', 'tgtset', 'tgtreset', 'command', 'parameters', 'format']
|
|
found = False
|
|
for kw in keywords:
|
|
idx = full.lower().find(kw)
|
|
if idx != -1:
|
|
found = True
|
|
start = max(0, idx-200)
|
|
end = min(len(full), idx+400)
|
|
ctx = full[start:end]
|
|
print('\n--- context around "{}" ---\n'.format(kw))
|
|
print(ctx)
|
|
|
|
if not found:
|
|
# fallback: print first 3000 chars for manual inspection
|
|
print('\n--- No keywords found; printing first 3000 chars of PDF text ---\n')
|
|
print(full[:3000])
|
|
sys.exit(0)
|
|
else:
|
|
# Also print the specific pages around TOC entries (38-41) for clarity
|
|
print('\n--- Explicitly printing pages 38-41 ---\n')
|
|
for pi in range(max(0, 38-1), min(len(reader.pages), 41)):
|
|
print(f'--- PAGE {pi+1} ---\n')
|
|
try:
|
|
print(reader.pages[pi].extract_text() or '')
|
|
except Exception as e:
|
|
print('ERROR extracting page', pi+1, e)
|
|
sys.exit(0)
|