import sys from pathlib import Path # Ensure project root is on sys.path so package imports work when running this script directly sys.path.insert(0, str(Path(__file__).resolve().parents[1])) from markdownconverter.core.core import convert_pdf_to_markdown pdf = Path( r"c:\src\____GitProjects\SXXXXXXX_MarkdownConverter\doc\ICD_DECD_FTH - GRIFO-F_TH, Data Exchange Control Document for, rev -A, Draft 2.pdf" ) md_out = pdf.with_suffix(".md") img_folder = pdf.parent / f"{pdf.stem}_images" print("Input PDF:", pdf) print("Output MD:", md_out) print("Image folder:", img_folder) try: # For testing, limit to first 5 pages to speed up debugging result = convert_pdf_to_markdown( str(pdf), str(md_out), extract_images=True, image_folder=str(img_folder), page_limit=5, ) print("\nConversion completed. Markdown saved to:", result) # Print preview with open(result, "r", encoding="utf-8") as f: lines = f.readlines() preview = "".join(lines[:200]) print("\n--- Markdown preview (first 200 lines) ---\n") print(preview) # Count images if img_folder.exists(): imgs = list(img_folder.glob("*")) print(f"\nImages extracted: {len(imgs)}") else: print("\nImages extracted: 0") except Exception as e: import traceback print("Conversion failed:", e) traceback.print_exc()