372 lines
13 KiB
Python
372 lines
13 KiB
Python
# markdownconverter/gui/pdf_to_markdown.py
|
||
|
||
"""
|
||
PDF to Markdown converter tab.
|
||
Allows users to convert PDF files to Markdown format with optional image extraction.
|
||
"""
|
||
|
||
import os
|
||
import sys
|
||
import subprocess
|
||
import threading
|
||
import tkinter as tk
|
||
from pathlib import Path
|
||
from tkinter import filedialog, messagebox, StringVar, BooleanVar
|
||
import ttkbootstrap as tb
|
||
from tkinter.scrolledtext import ScrolledText
|
||
from ttkbootstrap.constants import *
|
||
|
||
from ..core.core import convert_pdf_to_markdown
|
||
from ..utils.logger import get_logger
|
||
from ..utils.error_handler import handle_conversion_error
|
||
|
||
log = get_logger(__name__)
|
||
|
||
|
||
class PdfToMarkdownTab(tb.Frame):
|
||
"""
|
||
Tab per la conversione di file PDF in Markdown.
|
||
Supporta l'estrazione opzionale delle immagini.
|
||
"""
|
||
|
||
def __init__(self, parent):
|
||
super().__init__(parent, padding=10)
|
||
|
||
self.pdf_path = StringVar()
|
||
self.output_path = StringVar()
|
||
self.extract_images = BooleanVar(value=True)
|
||
self.image_folder = StringVar()
|
||
|
||
self._build_ui()
|
||
|
||
def _build_ui(self):
|
||
"""Costruisce l'interfaccia utente del tab PDF→Markdown."""
|
||
|
||
# Frame per la selezione del file PDF
|
||
input_frame = tb.Labelframe(self, text="File PDF Sorgente", padding=10)
|
||
input_frame.pack(fill=tk.X, pady=(0, 10))
|
||
input_frame.columnconfigure(1, weight=1)
|
||
|
||
tb.Label(input_frame, text="File PDF:").grid(
|
||
row=0, column=0, padx=5, pady=5, sticky="w"
|
||
)
|
||
tb.Entry(input_frame, textvariable=self.pdf_path).grid(
|
||
row=0, column=1, padx=5, pady=5, sticky="ew"
|
||
)
|
||
tb.Button(
|
||
input_frame,
|
||
text="Sfoglia... 📁",
|
||
command=self._choose_pdf,
|
||
bootstyle=PRIMARY,
|
||
).grid(row=0, column=2, padx=5, pady=5)
|
||
|
||
# Frame per il file di output
|
||
output_frame = tb.Labelframe(
|
||
self, text="File Markdown di Destinazione", padding=10
|
||
)
|
||
output_frame.pack(fill=tk.X, pady=(0, 10))
|
||
output_frame.columnconfigure(1, weight=1)
|
||
|
||
tb.Label(output_frame, text="File Markdown:").grid(
|
||
row=0, column=0, padx=5, pady=5, sticky="w"
|
||
)
|
||
tb.Entry(output_frame, textvariable=self.output_path).grid(
|
||
row=0, column=1, padx=5, pady=5, sticky="ew"
|
||
)
|
||
tb.Button(
|
||
output_frame,
|
||
text="Specifica... 🖊️",
|
||
command=self._choose_output,
|
||
bootstyle=SECONDARY,
|
||
).grid(row=0, column=2, padx=5, pady=5)
|
||
tb.Button(
|
||
output_frame,
|
||
text="Open Folder 📂",
|
||
command=self._open_output_folder,
|
||
bootstyle=INFO,
|
||
).grid(row=0, column=3, padx=5, pady=5)
|
||
|
||
# Frame per le opzioni
|
||
options_frame = tb.Labelframe(self, text="Opzioni di Conversione", padding=10)
|
||
options_frame.pack(fill=tk.X, pady=(0, 10))
|
||
options_frame.columnconfigure(1, weight=1)
|
||
|
||
# Checkbox per estrazione immagini
|
||
tb.Checkbutton(
|
||
options_frame,
|
||
text="Estrai immagini dal PDF 🖼️",
|
||
variable=self.extract_images,
|
||
command=self._toggle_image_options,
|
||
bootstyle="primary-round-toggle",
|
||
).grid(row=0, column=0, padx=5, pady=5, sticky="w")
|
||
|
||
# Frame per cartella immagini (opzionale)
|
||
tb.Label(options_frame, text="Cartella immagini:").grid(
|
||
row=1, column=0, padx=5, pady=5, sticky="w"
|
||
)
|
||
self.image_folder_entry = tb.Entry(
|
||
options_frame, textvariable=self.image_folder
|
||
)
|
||
self.image_folder_entry.grid(row=1, column=1, padx=5, pady=5, sticky="ew")
|
||
|
||
self.image_folder_button = tb.Button(
|
||
options_frame, text="Seleziona... 📁", command=self._choose_image_folder
|
||
)
|
||
self.image_folder_button.grid(row=1, column=2, padx=5, pady=5)
|
||
|
||
tb.Label(
|
||
options_frame,
|
||
text="(Lascia vuoto per usare la cartella di default)",
|
||
bootstyle=SECONDARY,
|
||
font=("TkDefaultFont", 8),
|
||
).grid(row=2, column=1, padx=5, pady=(0, 5), sticky="w")
|
||
|
||
# Info box
|
||
info_frame = tb.Frame(self)
|
||
info_frame.pack(fill=tk.X, pady=(0, 10))
|
||
|
||
info_text = (
|
||
"ℹ️ Nota: La conversione PDF→Markdown preserva la struttura del testo, "
|
||
"i titoli e la formattazione di base (grassetto, corsivo). "
|
||
"La qualità dipende dalla struttura del PDF originale."
|
||
)
|
||
tb.Label(
|
||
info_frame, text=info_text, wraplength=800, bootstyle=INFO, padding=10
|
||
).pack(fill=tk.X)
|
||
|
||
# Pulsante di conversione
|
||
tb.Button(
|
||
self,
|
||
text="Converti PDF → Markdown 🔁",
|
||
command=self._convert,
|
||
bootstyle=SUCCESS,
|
||
width=30,
|
||
).pack(pady=10)
|
||
|
||
# Progress bar
|
||
progress_frame = tb.Frame(self)
|
||
progress_frame.pack(fill=tk.X, pady=(0, 10))
|
||
self.progress_var = tk.IntVar(value=0)
|
||
self.progress = tb.Progressbar(
|
||
progress_frame, bootstyle="info", variable=self.progress_var, length=400
|
||
)
|
||
self.progress.pack(fill=tk.X, padx=10)
|
||
|
||
# Log area
|
||
log_frame = tb.Labelframe(self, text="Log Conversione", padding=10)
|
||
log_frame.pack(fill=tk.BOTH, expand=True)
|
||
|
||
self.log_box = ScrolledText(
|
||
log_frame, height=12, state="disabled", wrap=tk.WORD
|
||
)
|
||
self.log_box.pack(fill=tk.BOTH, expand=True)
|
||
|
||
# Initialize image options state
|
||
self._toggle_image_options()
|
||
|
||
def _log(self, text):
|
||
"""Aggiunge un messaggio all'area log."""
|
||
self.log_box.configure(state="normal")
|
||
self.log_box.insert(tk.END, text + "\n")
|
||
self.log_box.configure(state="disabled")
|
||
self.log_box.see(tk.END)
|
||
self.update_idletasks()
|
||
|
||
def _set_ui_state(self, enabled: bool):
|
||
state = "normal" if enabled else "disabled"
|
||
# toggle main inputs/buttons
|
||
for widget in (
|
||
self.image_folder_entry,
|
||
self.image_folder_button,
|
||
self.log_box,
|
||
):
|
||
try:
|
||
widget.configure(state=state)
|
||
except Exception:
|
||
pass
|
||
|
||
def _on_progress(self, percent: int, message: str = ""):
|
||
# Called from background thread; marshal to main thread
|
||
def _update():
|
||
try:
|
||
self.progress_var.set(int(percent))
|
||
except Exception:
|
||
pass
|
||
if message:
|
||
self._log(f"{percent}% - {message}")
|
||
|
||
try:
|
||
self.after(1, _update)
|
||
except Exception:
|
||
# fallback: call directly
|
||
_update()
|
||
|
||
def _choose_pdf(self):
|
||
"""Apre il dialogo per selezionare il file PDF."""
|
||
file = filedialog.askopenfilename(
|
||
title="Seleziona il file PDF",
|
||
filetypes=[("PDF Files", "*.pdf"), ("All Files", "*.*")],
|
||
)
|
||
if file:
|
||
self.pdf_path.set(file)
|
||
self._log(f"PDF selezionato: {os.path.basename(file)}")
|
||
|
||
# Auto-suggest output path
|
||
if not self.output_path.get():
|
||
pdf_path = Path(file)
|
||
suggested_md = pdf_path.parent / f"{pdf_path.stem}.md"
|
||
self.output_path.set(str(suggested_md))
|
||
|
||
def _choose_output(self):
|
||
"""Apre il dialogo per specificare il file markdown di output."""
|
||
file = filedialog.asksaveasfilename(
|
||
title="Specifica il file Markdown di output",
|
||
defaultextension=".md",
|
||
filetypes=[("Markdown Files", "*.md"), ("All Files", "*.*")],
|
||
)
|
||
if file:
|
||
self.output_path.set(file)
|
||
self._log(f"Output impostato: {os.path.basename(file)}")
|
||
|
||
def _toggle_image_options(self):
|
||
"""Attiva/disattiva i controlli per le immagini."""
|
||
state = "normal" if self.extract_images.get() else "disabled"
|
||
self.image_folder_entry.configure(state=state)
|
||
self.image_folder_button.configure(state=state)
|
||
|
||
def _choose_image_folder(self):
|
||
"""Apre il dialogo per selezionare la cartella delle immagini."""
|
||
folder = filedialog.askdirectory(title="Seleziona la cartella per le immagini")
|
||
if folder:
|
||
self.image_folder.set(folder)
|
||
self._log(f"Cartella immagini: {folder}")
|
||
|
||
def _convert(self):
|
||
"""Esegue la conversione PDF→Markdown."""
|
||
pdf_file = self.pdf_path.get().strip()
|
||
output_file = self.output_path.get().strip()
|
||
extract_imgs = self.extract_images.get()
|
||
img_folder = self.image_folder.get().strip() or None
|
||
|
||
# Validazione input
|
||
if not pdf_file:
|
||
messagebox.showwarning("Attenzione", "Seleziona un file PDF da convertire.")
|
||
return
|
||
|
||
if not output_file:
|
||
messagebox.showwarning(
|
||
"Attenzione", "Specifica il percorso del file Markdown di output."
|
||
)
|
||
return
|
||
|
||
if not os.path.exists(pdf_file):
|
||
messagebox.showerror("Errore", f"Il file PDF non esiste:\n{pdf_file}")
|
||
return
|
||
|
||
# Conferma sovrascrittura se il file esiste
|
||
if os.path.exists(output_file):
|
||
if not messagebox.askyesno(
|
||
"Conferma Sovrascrittura",
|
||
f"Il file esiste già:\n{output_file}\n\nSovrascrivere?",
|
||
):
|
||
return
|
||
|
||
self._log(f"\n{"="*60}")
|
||
self._log("INIZIO CONVERSIONE PDF → MARKDOWN")
|
||
self._log(f"{'='*60}")
|
||
self._log(f"File PDF: {os.path.basename(pdf_file)}")
|
||
self._log(f"Output: {os.path.basename(output_file)}")
|
||
self._log(f"Estrazione immagini: {'Sì' if extract_imgs else 'No'}")
|
||
|
||
# Run conversion in background thread to avoid blocking the GUI
|
||
def _run_conversion():
|
||
try:
|
||
# Update UI to busy state
|
||
try:
|
||
self.after(0, lambda: self._set_ui_state(False))
|
||
except Exception:
|
||
pass
|
||
|
||
result_path = convert_pdf_to_markdown(
|
||
pdf_file,
|
||
output_file,
|
||
extract_images=extract_imgs,
|
||
image_folder=img_folder or None,
|
||
progress_callback=self._on_progress,
|
||
)
|
||
|
||
def _on_success():
|
||
self._log(f"\n✅ Conversione completata con successo!")
|
||
self._log(f"File Markdown: {os.path.basename(result_path)}")
|
||
if extract_imgs:
|
||
if img_folder:
|
||
img_path = Path(img_folder)
|
||
else:
|
||
img_path = (
|
||
Path(output_file).parent
|
||
/ f"{Path(output_file).stem}_images"
|
||
)
|
||
if img_path.exists():
|
||
img_count = len(list(img_path.glob("*.*")))
|
||
self._log(f"Immagini estratte: {img_count}")
|
||
self._log(f"{'='*60}")
|
||
self._log("CONVERSIONE COMPLETATA")
|
||
self._log(f"{'='*60}\n")
|
||
try:
|
||
messagebox.showinfo(
|
||
"Successo",
|
||
f"Conversione completata!\n\nFile Markdown creato:\n{os.path.basename(result_path)}",
|
||
)
|
||
except Exception:
|
||
pass
|
||
# reset UI
|
||
self._set_ui_state(True)
|
||
try:
|
||
self.progress_var.set(0)
|
||
except Exception:
|
||
pass
|
||
|
||
try:
|
||
self.after(0, _on_success)
|
||
except Exception:
|
||
_on_success()
|
||
|
||
except Exception as e:
|
||
|
||
def _on_error():
|
||
handle_conversion_error(e, log_callback=self._log, show_dialog=True)
|
||
self._set_ui_state(True)
|
||
try:
|
||
self.progress_var.set(0)
|
||
except Exception:
|
||
pass
|
||
|
||
try:
|
||
self.after(0, _on_error)
|
||
except Exception:
|
||
_on_error()
|
||
|
||
thread = threading.Thread(target=_run_conversion, daemon=True)
|
||
thread.start()
|
||
|
||
def _open_output_folder(self):
|
||
"""Apre la cartella che contiene il file Markdown di output (o mostra avviso se non impostato)."""
|
||
out = self.output_path.get().strip()
|
||
if not out:
|
||
messagebox.showwarning("Attenzione", "Nessun percorso di output impostato.")
|
||
return
|
||
out_path = Path(out)
|
||
folder = out_path.parent if out_path.parent else out_path
|
||
try:
|
||
if sys.platform == "win32":
|
||
os.startfile(folder)
|
||
elif sys.platform == "darwin":
|
||
subprocess.run(["open", str(folder)], check=False)
|
||
else:
|
||
subprocess.run(["xdg-open", str(folder)], check=False)
|
||
except Exception as e:
|
||
log.error(f"Impossibile aprire la cartella di output: {e}", exc_info=True)
|
||
messagebox.showerror(
|
||
"Errore", f"Impossibile aprire la cartella di output:\n{e}"
|
||
)
|