update format
This commit is contained in:
parent
f3f2edae57
commit
4edfc33bdb
@ -1 +0,0 @@
|
|||||||
Luca Vallongo,Win11_Dev/admin,Win11_Dev,18.06.2025 08:03,file:///C:/Users/admin/AppData/Roaming/LibreOffice/4;
|
|
||||||
BIN
TemplateSumSample - versione con numeri su titoli.docx
Normal file
BIN
TemplateSumSample - versione con numeri su titoli.docx
Normal file
Binary file not shown.
Binary file not shown.
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"last_markdown_file": "C:/src/____GitProjects/cpp_python_debug/doc/Italian-manual.md",
|
"last_markdown_file": "C:/src/____GitProjects/radar_data_reader/doc/English-manual.md",
|
||||||
"last_selected_profile": "cpp_python_debug",
|
"last_selected_profile": "radar_data_reader",
|
||||||
"profiles": {
|
"profiles": {
|
||||||
"cpp_python_debug": {
|
"cpp_python_debug": {
|
||||||
"template_path": "C:/src/____GitProjects/cpp_python_debug/doc/TemplateSumSample.docx",
|
"template_path": "C:/src/____GitProjects/cpp_python_debug/doc/TemplateSumSample.docx",
|
||||||
@ -12,6 +12,28 @@
|
|||||||
"%%DOC_REV%%": "A2",
|
"%%DOC_REV%%": "A2",
|
||||||
"%%DOC_SECURITY%%": "INTERNAL"
|
"%%DOC_SECURITY%%": "INTERNAL"
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
"radar_data_reader": {
|
||||||
|
"template_path": "C:/src/____GitProjects/MarkdownConverter/TemplateSumSample.docx",
|
||||||
|
"values": {
|
||||||
|
"%%DOC_CUSTOMER%%": "INTERNAL",
|
||||||
|
"%%DOC_DATE%%": "22/07/2025",
|
||||||
|
"%%DOC_NUMBER%%": "90000002",
|
||||||
|
"%%DOC_PROJECT%%": "RADAR DATA READER",
|
||||||
|
"%%DOC_REV%%": "A",
|
||||||
|
"%%DOC_SECURITY%%": "INTERNAL"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"mark_converter": {
|
||||||
|
"template_path": "C:/src/____GitProjects/MarkdownConverter/TemplateSumSample.docx",
|
||||||
|
"values": {
|
||||||
|
"%%DOC_CUSTOMER%%": "INTERNO",
|
||||||
|
"%%DOC_DATE%%": "22/07/2025",
|
||||||
|
"%%DOC_NUMBER%%": "900000003",
|
||||||
|
"%%DOC_PROJECT%%": "MARK CONVERTER",
|
||||||
|
"%%DOC_REV%%": "A",
|
||||||
|
"%%DOC_SECURITY%%": "INTERNAL"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
BIN
doc/MARK_CONVERTER_SUM_900000003_A_20250722.docx
Normal file
BIN
doc/MARK_CONVERTER_SUM_900000003_A_20250722.docx
Normal file
Binary file not shown.
BIN
doc/MARK_CONVERTER_SUM_900000003_A_20250722.pdf
Normal file
BIN
doc/MARK_CONVERTER_SUM_900000003_A_20250722.pdf
Normal file
Binary file not shown.
@ -208,19 +208,27 @@ def _convert_to_pdf(markdown_text: str, output_file: str, add_toc: bool):
|
|||||||
log.info("Starting PDF conversion using pdfkit.")
|
log.info("Starting PDF conversion using pdfkit.")
|
||||||
if config is None:
|
if config is None:
|
||||||
raise FileNotFoundError("wkhtmltopdf executable not found. Cannot create PDF.")
|
raise FileNotFoundError("wkhtmltopdf executable not found. Cannot create PDF.")
|
||||||
|
|
||||||
title = _get_document_title(markdown_text)
|
title = _get_document_title(markdown_text)
|
||||||
content_without_title = markdown_text
|
content_without_title = markdown_text
|
||||||
match = re.search(r"^\s*#+\s+(.+)\n?", markdown_text, re.MULTILINE)
|
match = re.search(r"^\s*#+\s+(.+)\n?", markdown_text, re.MULTILINE)
|
||||||
if match:
|
if match:
|
||||||
content_without_title = markdown_text[match.end() :]
|
content_without_title = markdown_text[match.end() :]
|
||||||
md_converter = markdown.Markdown(extensions=["toc", "fenced_code", "tables"])
|
|
||||||
|
# Previous code:
|
||||||
|
# md_converter = markdown.Markdown(extensions=["toc", "fenced_code", "tables"])
|
||||||
|
# New code with 'nl2br' extension:
|
||||||
|
md_converter = markdown.Markdown(extensions=["toc", "fenced_code", "tables", "nl2br"])
|
||||||
|
|
||||||
html_body = md_converter.convert(content_without_title)
|
html_body = md_converter.convert(content_without_title)
|
||||||
toc_html = ""
|
toc_html = ""
|
||||||
if add_toc and hasattr(md_converter, "toc") and md_converter.toc:
|
if add_toc and hasattr(md_converter, "toc") and md_converter.toc:
|
||||||
log.info("Generating Table of Contents for PDF.")
|
log.info("Generating Table of Contents for PDF.")
|
||||||
toc_html = f"<h2>Table of Contents</h2>{md_converter.toc}<div style='page-break-after: always;'></div>"
|
toc_html = f"<h2>Table of Contents</h2>{md_converter.toc}<div style='page-break-after: always;'></div>"
|
||||||
|
|
||||||
full_html = f'<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><title>{title}</title><style>body{{font-family:sans-serif;}}h1,h2{{border-bottom:1px solid #eaecef;padding-bottom:.3em;}}</style></head><body><h1>{title}</h1>{toc_html}{html_body}</body></html>'
|
full_html = f'<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><title>{title}</title><style>body{{font-family:sans-serif;}}h1,h2{{border-bottom:1px solid #eaecef;padding-bottom:.3em;}}</style></head><body><h1>{title}</h1>{toc_html}{html_body}</body></html>'
|
||||||
pdf_options = {"encoding": "UTF-8", "enable-local-file-access": None}
|
pdf_options = {"encoding": "UTF-8", "enable-local-file-access": None}
|
||||||
|
|
||||||
pdfkit.from_string(
|
pdfkit.from_string(
|
||||||
full_html, output_file, configuration=config, options=pdf_options
|
full_html, output_file, configuration=config, options=pdf_options
|
||||||
)
|
)
|
||||||
@ -255,67 +263,99 @@ def _convert_to_docx(
|
|||||||
rev_history_md, main_content_md = _split_markdown_by_revision_history(markdown_text)
|
rev_history_md, main_content_md = _split_markdown_by_revision_history(markdown_text)
|
||||||
_add_revision_table(doc, rev_history_md)
|
_add_revision_table(doc, rev_history_md)
|
||||||
temp_files = []
|
temp_files = []
|
||||||
|
|
||||||
|
pandoc_format = "markdown+hard_line_breaks"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if main_content_md:
|
if main_content_md:
|
||||||
content_for_pandoc = main_content_md
|
content_for_pandoc = main_content_md
|
||||||
|
|
||||||
|
# Step 1: Remove the main H1 document title from the content to be processed.
|
||||||
|
# It's used for metadata, not for the main body's numbering.
|
||||||
match = re.search(r"^\s*#\s+(.+)\n?", content_for_pandoc, re.MULTILINE)
|
match = re.search(r"^\s*#\s+(.+)\n?", content_for_pandoc, re.MULTILINE)
|
||||||
if match:
|
if match:
|
||||||
log.info(
|
log.info("Removing main H1 title from content body.")
|
||||||
"Removing main title from content to exclude it from DOCX TOC."
|
|
||||||
)
|
|
||||||
content_for_pandoc = content_for_pandoc[match.end() :]
|
content_for_pandoc = content_for_pandoc[match.end() :]
|
||||||
log.info("Stripping manual numbering from headings.")
|
|
||||||
|
# Step 2: Strip any existing manual numbering from headings (e.g., "## 1. Title")
|
||||||
|
# to prevent double numbering when automatic numbering is applied.
|
||||||
|
log.info("Stripping manual numbering from headings for auto-numbering.")
|
||||||
content_for_pandoc = re.sub(
|
content_for_pandoc = re.sub(
|
||||||
r"^(\s*#+)\s+[0-9\.]+\s+",
|
r"^(\s*#+)\s+[0-9\.]+\s+",
|
||||||
r"\1 ",
|
r"\1 ",
|
||||||
content_for_pandoc,
|
content_for_pandoc,
|
||||||
flags=re.MULTILINE,
|
flags=re.MULTILINE,
|
||||||
)
|
)
|
||||||
pandoc_args = ["--shift-heading-level-by=-1"]
|
|
||||||
|
# Step 3: Configure Pandoc arguments for correct hierarchical numbering.
|
||||||
|
pandoc_args = [
|
||||||
|
# Enable automatic section numbering (e.g., 1, 1.1, 1.1.1).
|
||||||
|
"--number-sections",
|
||||||
|
# Shift heading levels up by one. This maps:
|
||||||
|
# ## (H2 in MD) -> Heading 1 in DOCX (numbered as 1, 2, ...)
|
||||||
|
# ### (H3 in MD) -> Heading 2 in DOCX (numbered as 1.1, 1.2, ...)
|
||||||
|
"--shift-heading-level-by=-1",
|
||||||
|
# Keep text left-aligned.
|
||||||
|
"--variable=justify:false",
|
||||||
|
]
|
||||||
|
|
||||||
if add_toc:
|
if add_toc:
|
||||||
pandoc_args.append("--toc")
|
pandoc_args.append("--toc")
|
||||||
log.info("Adding page break before Table of Contents.")
|
log.info("Adding page break before Table of Contents.")
|
||||||
toc_placeholder_p = _find_placeholder_paragraph(doc, "%%DOC_TOC%%")
|
toc_placeholder_p = _find_placeholder_paragraph(doc, "%%DOC_TOC%%")
|
||||||
toc_placeholder_p.insert_paragraph_before().add_run().add_break(
|
# Insert a page break before the TOC for better formatting.
|
||||||
WD_BREAK.PAGE
|
if toc_placeholder_p:
|
||||||
)
|
toc_placeholder_p.insert_paragraph_before().add_run().add_break(
|
||||||
|
WD_BREAK.PAGE
|
||||||
|
)
|
||||||
|
|
||||||
with tempfile.NamedTemporaryFile(
|
with tempfile.NamedTemporaryFile(
|
||||||
delete=False, suffix=".docx"
|
delete=False, suffix=".docx"
|
||||||
) as temp_file:
|
) as temp_file:
|
||||||
pypandoc.convert_text(
|
pypandoc.convert_text(
|
||||||
content_for_pandoc,
|
content_for_pandoc,
|
||||||
"docx",
|
"docx",
|
||||||
format="md",
|
format=pandoc_format,
|
||||||
extra_args=pandoc_args,
|
extra_args=pandoc_args,
|
||||||
outputfile=temp_file.name,
|
outputfile=temp_file.name,
|
||||||
)
|
)
|
||||||
temp_files.append(temp_file.name)
|
temp_files.append(temp_file.name)
|
||||||
_insert_docx_at_paragraph(toc_placeholder_p, temp_file.name)
|
if toc_placeholder_p:
|
||||||
|
_insert_docx_at_paragraph(toc_placeholder_p, temp_file.name)
|
||||||
|
# The main content is now part of the generated TOC doc, so remove the placeholder.
|
||||||
_remove_paragraph(_find_placeholder_paragraph(doc, "%%DOC_CONTENT%%"))
|
_remove_paragraph(_find_placeholder_paragraph(doc, "%%DOC_CONTENT%%"))
|
||||||
else:
|
else:
|
||||||
|
# If no TOC, just insert the content at its placeholder.
|
||||||
log.info("Adding page break before main content.")
|
log.info("Adding page break before main content.")
|
||||||
content_placeholder_p = _find_placeholder_paragraph(
|
content_placeholder_p = _find_placeholder_paragraph(
|
||||||
doc, "%%DOC_CONTENT%%"
|
doc, "%%DOC_CONTENT%%"
|
||||||
)
|
)
|
||||||
content_placeholder_p.insert_paragraph_before().add_run().add_break(
|
if content_placeholder_p:
|
||||||
WD_BREAK.PAGE
|
content_placeholder_p.insert_paragraph_before().add_run().add_break(
|
||||||
)
|
WD_BREAK.PAGE
|
||||||
|
)
|
||||||
|
|
||||||
with tempfile.NamedTemporaryFile(
|
with tempfile.NamedTemporaryFile(
|
||||||
delete=False, suffix=".docx"
|
delete=False, suffix=".docx"
|
||||||
) as temp_file:
|
) as temp_file:
|
||||||
|
# We don't add '--toc' to pandoc_args here.
|
||||||
pypandoc.convert_text(
|
pypandoc.convert_text(
|
||||||
content_for_pandoc,
|
content_for_pandoc,
|
||||||
"docx",
|
"docx",
|
||||||
format="md",
|
format=pandoc_format,
|
||||||
extra_args=pandoc_args,
|
extra_args=pandoc_args,
|
||||||
outputfile=temp_file.name,
|
outputfile=temp_file.name,
|
||||||
)
|
)
|
||||||
temp_files.append(temp_file.name)
|
temp_files.append(temp_file.name)
|
||||||
_insert_docx_at_paragraph(content_placeholder_p, temp_file.name)
|
if content_placeholder_p:
|
||||||
|
_insert_docx_at_paragraph(content_placeholder_p, temp_file.name)
|
||||||
|
# TOC placeholder is not used, so remove it.
|
||||||
_remove_paragraph(_find_placeholder_paragraph(doc, "%%DOC_TOC%%"))
|
_remove_paragraph(_find_placeholder_paragraph(doc, "%%DOC_TOC%%"))
|
||||||
else:
|
else:
|
||||||
|
# If there is no main content, remove both placeholders.
|
||||||
_remove_paragraph(_find_placeholder_paragraph(doc, "%%DOC_TOC%%"))
|
_remove_paragraph(_find_placeholder_paragraph(doc, "%%DOC_TOC%%"))
|
||||||
_remove_paragraph(_find_placeholder_paragraph(doc, "%%DOC_CONTENT%%"))
|
_remove_paragraph(_find_placeholder_paragraph(doc, "%%DOC_CONTENT%%"))
|
||||||
|
|
||||||
doc.save(output_file)
|
doc.save(output_file)
|
||||||
log.info(f"Document successfully created at {output_file}")
|
log.info(f"Document successfully created at {output_file}")
|
||||||
finally:
|
finally:
|
||||||
|
|||||||
@ -149,13 +149,19 @@ class MarkdownConverterApp:
|
|||||||
self.dynamic_entry_vars = {}
|
self.dynamic_entry_vars = {}
|
||||||
self.loaded_profile_name = ""
|
self.loaded_profile_name = ""
|
||||||
self.selected_file = StringVar(value=self.config.get(KEY_LAST_MARKDOWN, ""))
|
self.selected_file = StringVar(value=self.config.get(KEY_LAST_MARKDOWN, ""))
|
||||||
self.active_profile_name = StringVar() # Inizializzata vuota
|
|
||||||
|
# --- NEW CODE ---
|
||||||
|
# Add a trace to the StringVar. Whenever its value is written,
|
||||||
|
# the _on_selected_file_change callback will be executed.
|
||||||
|
self.selected_file.trace_add("write", self._on_selected_file_change)
|
||||||
|
|
||||||
|
self.active_profile_name = StringVar() # Initialized empty
|
||||||
self.add_toc_var = BooleanVar(value=True)
|
self.add_toc_var = BooleanVar(value=True)
|
||||||
self.docx_output_path = StringVar()
|
self.docx_output_path = StringVar()
|
||||||
self.pdf_direct_output_path = StringVar()
|
self.pdf_direct_output_path = StringVar()
|
||||||
self.pdf_from_docx_output_path = StringVar()
|
self.pdf_from_docx_output_path = StringVar()
|
||||||
self._build_ui()
|
self._build_ui()
|
||||||
self.update_profile_combobox() # Questo imposterà active_profile_name
|
self.update_profile_combobox() # This will set active_profile_name
|
||||||
self.root.protocol("WM_DELETE_WINDOW", self._on_closing)
|
self.root.protocol("WM_DELETE_WINDOW", self._on_closing)
|
||||||
|
|
||||||
def _setup_logging(self):
|
def _setup_logging(self):
|
||||||
@ -345,6 +351,13 @@ class MarkdownConverterApp:
|
|||||||
shutdown_logging_system()
|
shutdown_logging_system()
|
||||||
self.root.destroy()
|
self.root.destroy()
|
||||||
|
|
||||||
|
def _on_selected_file_change(self, *args):
|
||||||
|
"""
|
||||||
|
Callback function that is triggered whenever the self.selected_file
|
||||||
|
StringVar is written to. It ensures the output paths are updated.
|
||||||
|
"""
|
||||||
|
self._update_output_paths()
|
||||||
|
|
||||||
def run_app():
|
def run_app():
|
||||||
root = tb.Window(themename="sandstone")
|
root = tb.Window(themename="sandstone")
|
||||||
app = MarkdownConverterApp(root)
|
app = MarkdownConverterApp(root)
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user