add toc to pdf

This commit is contained in:
VALLONGOL 2025-06-11 09:12:34 +02:00
parent 643506513e
commit bdf52039c1
3 changed files with 169 additions and 37 deletions

BIN
CFM001-T-IT-I en-it.dotx Normal file

Binary file not shown.

View File

@ -1,31 +1,113 @@
import os
import re
import markdown
import pdfkit
import pypandoc
# Path to the wkhtmltopdf executable
WKHTMLTOPDF_PATH = r"C:\Program Files\wkhtmltopdf\bin\wkhtmltopdf.exe"
config = pdfkit.configuration(wkhtmltopdf=WKHTMLTOPDF_PATH)
TEMPLATE_DOCX_PATH = os.path.join(os.path.dirname(__file__), "..", "templates", "default_template.docx")
# Default path for the DOCX template
TEMPLATE_DOCX_PATH = os.path.join(
os.path.dirname(__file__), "..", "templates", "default_template.docx"
)
def convert_markdown(input_file, output_format, font=None, template_path=None):
output_file = os.path.splitext(input_file)[0] + (".pdf" if output_format == "PDF" else ".docx")
def _get_document_title(markdown_text):
"""Extracts the first header (any level) from markdown text to use as a title."""
for line in markdown_text.splitlines():
if re.match(r'^#+\s', line.strip()):
return re.sub(r'^#+\s*', '', line.strip())
return "Document"
def _extract_title_and_separate_content(markdown_text):
"""
Extracts the first header (any level) and returns it with the rest of the content.
"""
lines = markdown_text.splitlines()
title = "Document"
content_lines = []
title_found = False
for line in lines:
if not title_found and re.match(r'^#+\s', line.strip()):
title = re.sub(r'^#+\s*', '', line.strip())
title_found = True
else:
content_lines.append(line)
content_without_title = "\n".join(content_lines)
return title, content_without_title
def convert_markdown(input_file, output_format, add_toc=False, font=None, template_path=None):
"""
Converts a Markdown file to the specified output format (PDF or DOCX).
"""
if not os.path.exists(input_file):
raise FileNotFoundError(f"Input file not found: {input_file}")
output_file = os.path.splitext(input_file)[0] + (
".pdf" if output_format == "PDF" else ".docx"
)
if output_format == "PDF":
with open(input_file, 'r', encoding='utf-8') as f:
html = markdown.markdown(f.read())
style = f"<style>body {{ font-family: '{font}'; }}</style>" if font else ""
pdfkit.from_string(style + html, output_file, configuration=config)
markdown_text = f.read()
# PDF logic requires manual assembly, so it remains the same
extensions = ['toc'] if add_toc else []
md = markdown.Markdown(extensions=extensions)
body_markdown = markdown_text
title = _get_document_title(markdown_text)
if add_toc:
title, body_markdown = _extract_title_and_separate_content(markdown_text)
html_body = md.convert(body_markdown)
style = f"<style>body {{ font-family: '{font}'; }} .page-break {{ page-break-after: always; }}</style>" if font else "<style>.page-break { page-break-after: always; }</style>"
toc_html = ""
if add_toc and hasattr(md, 'toc'):
toc_html = f"""
<h1>{title}</h1>
<h2>Table of Contents</h2>
{md.toc}
<div class="page-break"></div>
"""
full_html = f"""
<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8">{style}<title>{title}</title></head><body>{toc_html}{html_body}</body></html>
"""
options = {'encoding': "UTF-8"}
pdfkit.from_string(full_html, output_file, configuration=config, options=options)
elif output_format == "DOCX":
args = []
# --- THE CORRECT AND SIMPLIFIED LOGIC ---
args = ["--standalone"]
if add_toc:
# Let pandoc handle title detection and TOC generation automatically.
args.append("--toc")
if template_path and os.path.exists(template_path):
args.extend(["--reference-doc", template_path])
elif os.path.exists(TEMPLATE_DOCX_PATH):
args.extend(["--reference-doc", TEMPLATE_DOCX_PATH])
pypandoc.convert_file(input_file, 'docx', outputfile=output_file, extra_args=args)
# We use convert_file with the original, unmodified input file.
pypandoc.convert_file(
input_file,
'docx',
outputfile=output_file,
extra_args=args,
encoding='utf-8'
)
else:
raise ValueError("Formato non supportato")
raise ValueError("Unsupported format")
return output_file
return output_file

View File

@ -3,16 +3,18 @@ import subprocess
import json
import ttkbootstrap as tb
from ttkbootstrap.constants import *
from tkinter import filedialog, messagebox, StringVar
from tkinter import filedialog, messagebox, StringVar, BooleanVar
from ..core.core import convert_markdown
CONFIG_FILE = os.path.join(os.path.expanduser("~"), ".markdown_converter_config.json")
def save_config(font_name):
# This function seems duplicated with config.py, consider centralizing
with open(CONFIG_FILE, "w", encoding="utf-8") as f:
json.dump({"font": font_name}, f)
def load_config():
# This function seems duplicated with config.py, consider centralizing
if os.path.exists(CONFIG_FILE):
with open(CONFIG_FILE, "r", encoding="utf-8") as f:
data = json.load(f)
@ -20,73 +22,121 @@ def load_config():
return ""
def open_with_default_app(filepath):
"""Opens a file with the default registered application."""
if not filepath:
messagebox.showwarning("Warning", "No output file has been generated yet.")
return
try:
os.startfile(filepath) # solo su Windows
os.startfile(filepath) # Windows-specific
except FileNotFoundError:
messagebox.showerror("Error", f"File not found:\n{filepath}")
except Exception as e:
messagebox.showerror("Errore", f"Impossibile aprire il file:\n{str(e)}")
messagebox.showerror("Error", f"Could not open the file:\n{str(e)}")
def open_output_folder(filepath):
"""Opens the folder containing the specified file."""
if not filepath:
messagebox.showwarning("Warning", "No output file has been generated yet.")
return
try:
folder = os.path.dirname(filepath)
os.startfile(folder)
except Exception as e:
messagebox.showerror("Errore", f"Impossibile aprire la cartella:\n{str(e)}")
messagebox.showerror("Error", f"Could not open the folder:\n{str(e)}")
def run_app():
"""Initializes and runs the main application window."""
app = tb.Window(themename="sandstone")
app.title("Markdown Converter")
app.geometry("680x240")
app.geometry("680x280") # Increased height for the new widget
app.resizable(False, False)
# --- Variables ---
selected_file = StringVar()
selected_font = StringVar(value=load_config())
selected_template = StringVar()
output_path = StringVar()
add_toc_var = BooleanVar(value=True) # Variable for the TOC checkbox
# --- Functions ---
def browse_markdown():
path = filedialog.askopenfilename(filetypes=[("Markdown files", "*.md")])
"""Opens a file dialog to select a Markdown file."""
path = filedialog.askopenfilename(
title="Select a Markdown file",
filetypes=[("Markdown files", "*.md"), ("All files", "*.*")]
)
if path:
selected_file.set(path)
def browse_template():
path = filedialog.askopenfilename(filetypes=[("DOCX files", "*.docx")])
"""Opens a file dialog to select a DOCX or DOTX template file."""
path = filedialog.askopenfilename(
title="Select a template file",
filetypes=[
("Word Documents", "*.docx"),
("Word Templates", "*.dotx"),
("All files", "*.*")
]
)
if path:
selected_template.set(path)
def convert(fmt):
"""Handles the conversion process when a button is clicked."""
file_path = selected_file.get()
font = selected_font.get()
template = selected_template.get()
add_toc = add_toc_var.get()
if not file_path:
messagebox.showerror("Errore", "Seleziona un file Markdown.")
messagebox.showerror("Error", "Please select a Markdown file.")
return
try:
output = convert_markdown(file_path, fmt, font, template)
output = convert_markdown(file_path, fmt, add_toc, font, template)
output_path.set(output)
save_config(font)
messagebox.showinfo("Successo", f"File convertito:\n{output}")
save_config(font) # Save font on successful conversion
messagebox.showinfo("Success", f"File converted successfully:\n{output}")
except Exception as e:
messagebox.showerror("Errore", str(e))
messagebox.showerror("Error", f"An error occurred during conversion:\n{str(e)}")
# Layout
tb.Label(app, text="File Markdown:").grid(row=0, column=0, padx=10, pady=10, sticky="w")
tb.Entry(app, textvariable=selected_file, width=60).grid(row=0, column=1, padx=5, sticky="w")
tb.Button(app, text="Sfoglia", command=browse_markdown, bootstyle=PRIMARY).grid(row=0, column=2, padx=5)
# --- Layout ---
main_frame = tb.Frame(app, padding=10)
main_frame.pack(fill=BOTH, expand=True)
tb.Label(app, text="Font:").grid(row=1, column=0, padx=10, pady=10, sticky="w")
# Row 0: Input file
tb.Label(main_frame, text="Markdown File:").grid(row=0, column=0, padx=(0, 10), pady=5, sticky="w")
tb.Entry(main_frame, textvariable=selected_file, width=60).grid(row=0, column=1, padx=5, sticky="ew")
tb.Button(main_frame, text="Browse...", command=browse_markdown, bootstyle=PRIMARY).grid(row=0, column=2, padx=5)
# Row 1: Font (for PDF)
tb.Label(main_frame, text="Font (PDF):").grid(row=1, column=0, padx=(0, 10), pady=5, sticky="w")
fonts = ["Arial", "Times New Roman", "Verdana", "Calibri", "Courier New"]
tb.Combobox(app, values=fonts, textvariable=selected_font, width=30, bootstyle=INFO).grid(row=1, column=1, sticky="w", padx=5)
tb.Combobox(main_frame, values=fonts, textvariable=selected_font, width=30, bootstyle=INFO).grid(row=1, column=1, sticky="w", padx=5)
tb.Label(app, text="Template DOCX (opzionale):").grid(row=2, column=0, padx=10, pady=10, sticky="w")
tb.Entry(app, textvariable=selected_template, width=60).grid(row=2, column=1, padx=5)
tb.Button(app, text="Sfoglia", command=browse_template, bootstyle=SECONDARY).grid(row=2, column=2, padx=5)
# Row 2: Template (for DOCX)
tb.Label(main_frame, text="Template (DOCX):").grid(row=2, column=0, padx=(0, 10), pady=5, sticky="w")
tb.Entry(main_frame, textvariable=selected_template, width=60).grid(row=2, column=1, padx=5, sticky="ew")
tb.Button(main_frame, text="Browse...", command=browse_template, bootstyle=SECONDARY).grid(row=2, column=2, padx=5)
# Row 3: Options (TOC)
tb.Checkbutton(
main_frame,
text="Add Table of Contents at the beginning",
variable=add_toc_var,
bootstyle="primary-round-toggle"
).grid(row=3, column=1, pady=10, sticky="w")
tb.Button(app, text="Converti in PDF", command=lambda: convert("PDF"), bootstyle=SUCCESS).grid(row=3, column=0, pady=20)
tb.Button(app, text="Converti in DOCX", command=lambda: convert("DOCX"), bootstyle=SUCCESS).grid(row=3, column=1, sticky="w", pady=20)
# Row 4: Action Buttons
action_frame = tb.Frame(main_frame)
action_frame.grid(row=4, column=0, columnspan=3, pady=10)
tb.Button(action_frame, text="Convert to PDF", command=lambda: convert("PDF"), bootstyle=SUCCESS).pack(side=LEFT, padx=5)
tb.Button(action_frame, text="Convert to DOCX", command=lambda: convert("DOCX"), bootstyle=SUCCESS).pack(side=LEFT, padx=5)
tb.Button(action_frame, text="Open File", command=lambda: open_with_default_app(output_path.get()), bootstyle=WARNING).pack(side=LEFT, padx=(20, 5))
tb.Button(action_frame, text="Open Folder", command=lambda: open_output_folder(output_path.get()), bootstyle=WARNING).pack(side=LEFT, padx=5)
main_frame.columnconfigure(1, weight=1)
tb.Button(app, text="Apri file", command=lambda: open_with_default_app(output_path.get()), bootstyle=WARNING).grid(row=4, column=0, padx=10, pady=10)
tb.Button(app, text="Apri cartella", command=lambda: open_output_folder(output_path.get()), bootstyle=WARNING).grid(row=4, column=1, sticky="w", padx=5)
app.mainloop()
app.mainloop()