# markdownconverter/gui/pdf_to_markdown.py """ PDF to Markdown converter tab. Allows users to convert PDF files to Markdown format with optional image extraction. """ import os import sys import subprocess import threading import tkinter as tk from pathlib import Path from tkinter import filedialog, messagebox, StringVar, BooleanVar import ttkbootstrap as tb from tkinter.scrolledtext import ScrolledText from ttkbootstrap.constants import * from ..core.core import convert_pdf_to_markdown from ..utils.logger import get_logger from ..utils.error_handler import handle_conversion_error log = get_logger(__name__) class PdfToMarkdownTab(tb.Frame): """ Tab per la conversione di file PDF in Markdown. Supporta l'estrazione opzionale delle immagini. """ def __init__(self, parent): super().__init__(parent, padding=10) self.pdf_path = StringVar() self.output_path = StringVar() self.extract_images = BooleanVar(value=True) self.image_folder = StringVar() self._build_ui() def _build_ui(self): """Costruisce l'interfaccia utente del tab PDF→Markdown.""" # Frame per la selezione del file PDF input_frame = tb.Labelframe(self, text="File PDF Sorgente", padding=10) input_frame.pack(fill=tk.X, pady=(0, 10)) input_frame.columnconfigure(1, weight=1) tb.Label(input_frame, text="File PDF:").grid( row=0, column=0, padx=5, pady=5, sticky="w" ) tb.Entry(input_frame, textvariable=self.pdf_path).grid( row=0, column=1, padx=5, pady=5, sticky="ew" ) tb.Button( input_frame, text="Sfoglia... 📁", command=self._choose_pdf, bootstyle=PRIMARY, ).grid(row=0, column=2, padx=5, pady=5) # Frame per il file di output output_frame = tb.Labelframe( self, text="File Markdown di Destinazione", padding=10 ) output_frame.pack(fill=tk.X, pady=(0, 10)) output_frame.columnconfigure(1, weight=1) tb.Label(output_frame, text="File Markdown:").grid( row=0, column=0, padx=5, pady=5, sticky="w" ) tb.Entry(output_frame, textvariable=self.output_path).grid( row=0, column=1, padx=5, pady=5, sticky="ew" ) tb.Button( output_frame, text="Specifica... 🖊️", command=self._choose_output, bootstyle=SECONDARY, ).grid(row=0, column=2, padx=5, pady=5) tb.Button( output_frame, text="Open Folder 📂", command=self._open_output_folder, bootstyle=INFO, ).grid(row=0, column=3, padx=5, pady=5) # Frame per le opzioni options_frame = tb.Labelframe(self, text="Opzioni di Conversione", padding=10) options_frame.pack(fill=tk.X, pady=(0, 10)) options_frame.columnconfigure(1, weight=1) # Checkbox per estrazione immagini tb.Checkbutton( options_frame, text="Estrai immagini dal PDF 🖼️", variable=self.extract_images, command=self._toggle_image_options, bootstyle="primary-round-toggle", ).grid(row=0, column=0, padx=5, pady=5, sticky="w") # Frame per cartella immagini (opzionale) tb.Label(options_frame, text="Cartella immagini:").grid( row=1, column=0, padx=5, pady=5, sticky="w" ) self.image_folder_entry = tb.Entry( options_frame, textvariable=self.image_folder ) self.image_folder_entry.grid(row=1, column=1, padx=5, pady=5, sticky="ew") self.image_folder_button = tb.Button( options_frame, text="Seleziona... 📁", command=self._choose_image_folder ) self.image_folder_button.grid(row=1, column=2, padx=5, pady=5) tb.Label( options_frame, text="(Lascia vuoto per usare la cartella di default)", bootstyle=SECONDARY, font=("TkDefaultFont", 8), ).grid(row=2, column=1, padx=5, pady=(0, 5), sticky="w") # Info box info_frame = tb.Frame(self) info_frame.pack(fill=tk.X, pady=(0, 10)) info_text = ( "ℹ️ Nota: La conversione PDF→Markdown preserva la struttura del testo, " "i titoli e la formattazione di base (grassetto, corsivo). " "La qualità dipende dalla struttura del PDF originale." ) tb.Label( info_frame, text=info_text, wraplength=800, bootstyle=INFO, padding=10 ).pack(fill=tk.X) # Pulsante di conversione tb.Button( self, text="Converti PDF → Markdown 🔁", command=self._convert, bootstyle=SUCCESS, width=30, ).pack(pady=10) # Progress bar progress_frame = tb.Frame(self) progress_frame.pack(fill=tk.X, pady=(0, 10)) self.progress_var = tk.IntVar(value=0) self.progress = tb.Progressbar( progress_frame, bootstyle="info", variable=self.progress_var, length=400 ) self.progress.pack(fill=tk.X, padx=10) # Log area log_frame = tb.Labelframe(self, text="Log Conversione", padding=10) log_frame.pack(fill=tk.BOTH, expand=True) self.log_box = ScrolledText( log_frame, height=12, state="disabled", wrap=tk.WORD ) self.log_box.pack(fill=tk.BOTH, expand=True) # Initialize image options state self._toggle_image_options() def _log(self, text): """Aggiunge un messaggio all'area log.""" self.log_box.configure(state="normal") self.log_box.insert(tk.END, text + "\n") self.log_box.configure(state="disabled") self.log_box.see(tk.END) self.update_idletasks() def _set_ui_state(self, enabled: bool): state = "normal" if enabled else "disabled" # toggle main inputs/buttons for widget in ( self.image_folder_entry, self.image_folder_button, self.log_box, ): try: widget.configure(state=state) except Exception: pass def _on_progress(self, percent: int, message: str = ""): # Called from background thread; marshal to main thread def _update(): try: self.progress_var.set(int(percent)) except Exception: pass if message: self._log(f"{percent}% - {message}") try: self.after(1, _update) except Exception: # fallback: call directly _update() def _choose_pdf(self): """Apre il dialogo per selezionare il file PDF.""" file = filedialog.askopenfilename( title="Seleziona il file PDF", filetypes=[("PDF Files", "*.pdf"), ("All Files", "*.*")], ) if file: self.pdf_path.set(file) self._log(f"PDF selezionato: {os.path.basename(file)}") # Auto-suggest output path if not self.output_path.get(): pdf_path = Path(file) suggested_md = pdf_path.parent / f"{pdf_path.stem}.md" self.output_path.set(str(suggested_md)) def _choose_output(self): """Apre il dialogo per specificare il file markdown di output.""" file = filedialog.asksaveasfilename( title="Specifica il file Markdown di output", defaultextension=".md", filetypes=[("Markdown Files", "*.md"), ("All Files", "*.*")], ) if file: self.output_path.set(file) self._log(f"Output impostato: {os.path.basename(file)}") def _toggle_image_options(self): """Attiva/disattiva i controlli per le immagini.""" state = "normal" if self.extract_images.get() else "disabled" self.image_folder_entry.configure(state=state) self.image_folder_button.configure(state=state) def _choose_image_folder(self): """Apre il dialogo per selezionare la cartella delle immagini.""" folder = filedialog.askdirectory(title="Seleziona la cartella per le immagini") if folder: self.image_folder.set(folder) self._log(f"Cartella immagini: {folder}") def _convert(self): """Esegue la conversione PDF→Markdown.""" pdf_file = self.pdf_path.get().strip() output_file = self.output_path.get().strip() extract_imgs = self.extract_images.get() img_folder = self.image_folder.get().strip() or None # Validazione input if not pdf_file: messagebox.showwarning("Attenzione", "Seleziona un file PDF da convertire.") return if not output_file: messagebox.showwarning( "Attenzione", "Specifica il percorso del file Markdown di output." ) return if not os.path.exists(pdf_file): messagebox.showerror("Errore", f"Il file PDF non esiste:\n{pdf_file}") return # Conferma sovrascrittura se il file esiste if os.path.exists(output_file): if not messagebox.askyesno( "Conferma Sovrascrittura", f"Il file esiste già:\n{output_file}\n\nSovrascrivere?", ): return self._log(f"\n{"="*60}") self._log("INIZIO CONVERSIONE PDF → MARKDOWN") self._log(f"{'='*60}") self._log(f"File PDF: {os.path.basename(pdf_file)}") self._log(f"Output: {os.path.basename(output_file)}") self._log(f"Estrazione immagini: {'Sì' if extract_imgs else 'No'}") # Run conversion in background thread to avoid blocking the GUI def _run_conversion(): try: # Update UI to busy state try: self.after(0, lambda: self._set_ui_state(False)) except Exception: pass result_path = convert_pdf_to_markdown( pdf_file, output_file, extract_images=extract_imgs, image_folder=img_folder or None, progress_callback=self._on_progress, ) def _on_success(): self._log(f"\n✅ Conversione completata con successo!") self._log(f"File Markdown: {os.path.basename(result_path)}") if extract_imgs: if img_folder: img_path = Path(img_folder) else: img_path = ( Path(output_file).parent / f"{Path(output_file).stem}_images" ) if img_path.exists(): img_count = len(list(img_path.glob("*.*"))) self._log(f"Immagini estratte: {img_count}") self._log(f"{'='*60}") self._log("CONVERSIONE COMPLETATA") self._log(f"{'='*60}\n") try: messagebox.showinfo( "Successo", f"Conversione completata!\n\nFile Markdown creato:\n{os.path.basename(result_path)}", ) except Exception: pass # reset UI self._set_ui_state(True) try: self.progress_var.set(0) except Exception: pass try: self.after(0, _on_success) except Exception: _on_success() except Exception as e: def _on_error(): handle_conversion_error(e, log_callback=self._log, show_dialog=True) self._set_ui_state(True) try: self.progress_var.set(0) except Exception: pass try: self.after(0, _on_error) except Exception: _on_error() thread = threading.Thread(target=_run_conversion, daemon=True) thread.start() def _open_output_folder(self): """Apre la cartella che contiene il file Markdown di output (o mostra avviso se non impostato).""" out = self.output_path.get().strip() if not out: messagebox.showwarning("Attenzione", "Nessun percorso di output impostato.") return out_path = Path(out) folder = out_path.parent if out_path.parent else out_path try: if sys.platform == "win32": os.startfile(folder) elif sys.platform == "darwin": subprocess.run(["open", str(folder)], check=False) else: subprocess.run(["xdg-open", str(folder)], check=False) except Exception as e: log.error(f"Impossibile aprire la cartella di output: {e}", exc_info=True) messagebox.showerror( "Errore", f"Impossibile aprire la cartella di output:\n{e}" )