diff --git a/.~lock.TemplateSumSample.docx# b/.~lock.TemplateSumSample.docx# deleted file mode 100644 index ccbbcdd..0000000 --- a/.~lock.TemplateSumSample.docx# +++ /dev/null @@ -1 +0,0 @@ -Luca Vallongo,Win11_Dev/admin,Win11_Dev,18.06.2025 08:03,file:///C:/Users/admin/AppData/Roaming/LibreOffice/4; \ No newline at end of file diff --git a/TemplateSumSample - versione con numeri su titoli.docx b/TemplateSumSample - versione con numeri su titoli.docx new file mode 100644 index 0000000..1a06bcc Binary files /dev/null and b/TemplateSumSample - versione con numeri su titoli.docx differ diff --git a/TemplateSumSample.docx b/TemplateSumSample.docx index 1a06bcc..f1c9d9d 100644 Binary files a/TemplateSumSample.docx and b/TemplateSumSample.docx differ diff --git a/config/app_config.json b/config/app_config.json index 8118f1b..f0736c5 100644 --- a/config/app_config.json +++ b/config/app_config.json @@ -1,6 +1,6 @@ { - "last_markdown_file": "C:/src/____GitProjects/cpp_python_debug/doc/Italian-manual.md", - "last_selected_profile": "cpp_python_debug", + "last_markdown_file": "C:/src/____GitProjects/radar_data_reader/doc/English-manual.md", + "last_selected_profile": "radar_data_reader", "profiles": { "cpp_python_debug": { "template_path": "C:/src/____GitProjects/cpp_python_debug/doc/TemplateSumSample.docx", @@ -12,6 +12,28 @@ "%%DOC_REV%%": "A2", "%%DOC_SECURITY%%": "INTERNAL" } + }, + "radar_data_reader": { + "template_path": "C:/src/____GitProjects/MarkdownConverter/TemplateSumSample.docx", + "values": { + "%%DOC_CUSTOMER%%": "INTERNAL", + "%%DOC_DATE%%": "22/07/2025", + "%%DOC_NUMBER%%": "90000002", + "%%DOC_PROJECT%%": "RADAR DATA READER", + "%%DOC_REV%%": "A", + "%%DOC_SECURITY%%": "INTERNAL" + } + }, + "mark_converter": { + "template_path": "C:/src/____GitProjects/MarkdownConverter/TemplateSumSample.docx", + "values": { + "%%DOC_CUSTOMER%%": "INTERNO", + "%%DOC_DATE%%": "22/07/2025", + "%%DOC_NUMBER%%": "900000003", + "%%DOC_PROJECT%%": "MARK CONVERTER", + "%%DOC_REV%%": "A", + "%%DOC_SECURITY%%": "INTERNAL" + } } } } \ No newline at end of file diff --git a/doc/MARK_CONVERTER_SUM_900000003_A_20250722.docx b/doc/MARK_CONVERTER_SUM_900000003_A_20250722.docx new file mode 100644 index 0000000..d1cf5d2 Binary files /dev/null and b/doc/MARK_CONVERTER_SUM_900000003_A_20250722.docx differ diff --git a/doc/MARK_CONVERTER_SUM_900000003_A_20250722.pdf b/doc/MARK_CONVERTER_SUM_900000003_A_20250722.pdf new file mode 100644 index 0000000..cde25e9 Binary files /dev/null and b/doc/MARK_CONVERTER_SUM_900000003_A_20250722.pdf differ diff --git a/markdownconverter/core/core.py b/markdownconverter/core/core.py index c8f845f..0c60ac2 100644 --- a/markdownconverter/core/core.py +++ b/markdownconverter/core/core.py @@ -208,19 +208,27 @@ def _convert_to_pdf(markdown_text: str, output_file: str, add_toc: bool): log.info("Starting PDF conversion using pdfkit.") if config is None: raise FileNotFoundError("wkhtmltopdf executable not found. Cannot create PDF.") + title = _get_document_title(markdown_text) content_without_title = markdown_text match = re.search(r"^\s*#+\s+(.+)\n?", markdown_text, re.MULTILINE) if match: content_without_title = markdown_text[match.end() :] - md_converter = markdown.Markdown(extensions=["toc", "fenced_code", "tables"]) + + # Previous code: + # md_converter = markdown.Markdown(extensions=["toc", "fenced_code", "tables"]) + # New code with 'nl2br' extension: + md_converter = markdown.Markdown(extensions=["toc", "fenced_code", "tables", "nl2br"]) + html_body = md_converter.convert(content_without_title) toc_html = "" if add_toc and hasattr(md_converter, "toc") and md_converter.toc: log.info("Generating Table of Contents for PDF.") toc_html = f"

Table of Contents

{md_converter.toc}
" + full_html = f'{title}

{title}

{toc_html}{html_body}' pdf_options = {"encoding": "UTF-8", "enable-local-file-access": None} + pdfkit.from_string( full_html, output_file, configuration=config, options=pdf_options ) @@ -255,67 +263,99 @@ def _convert_to_docx( rev_history_md, main_content_md = _split_markdown_by_revision_history(markdown_text) _add_revision_table(doc, rev_history_md) temp_files = [] + + pandoc_format = "markdown+hard_line_breaks" + try: if main_content_md: content_for_pandoc = main_content_md + + # Step 1: Remove the main H1 document title from the content to be processed. + # It's used for metadata, not for the main body's numbering. match = re.search(r"^\s*#\s+(.+)\n?", content_for_pandoc, re.MULTILINE) if match: - log.info( - "Removing main title from content to exclude it from DOCX TOC." - ) + log.info("Removing main H1 title from content body.") content_for_pandoc = content_for_pandoc[match.end() :] - log.info("Stripping manual numbering from headings.") + + # Step 2: Strip any existing manual numbering from headings (e.g., "## 1. Title") + # to prevent double numbering when automatic numbering is applied. + log.info("Stripping manual numbering from headings for auto-numbering.") content_for_pandoc = re.sub( r"^(\s*#+)\s+[0-9\.]+\s+", r"\1 ", content_for_pandoc, flags=re.MULTILINE, ) - pandoc_args = ["--shift-heading-level-by=-1"] + + # Step 3: Configure Pandoc arguments for correct hierarchical numbering. + pandoc_args = [ + # Enable automatic section numbering (e.g., 1, 1.1, 1.1.1). + "--number-sections", + # Shift heading levels up by one. This maps: + # ## (H2 in MD) -> Heading 1 in DOCX (numbered as 1, 2, ...) + # ### (H3 in MD) -> Heading 2 in DOCX (numbered as 1.1, 1.2, ...) + "--shift-heading-level-by=-1", + # Keep text left-aligned. + "--variable=justify:false", + ] + if add_toc: pandoc_args.append("--toc") log.info("Adding page break before Table of Contents.") toc_placeholder_p = _find_placeholder_paragraph(doc, "%%DOC_TOC%%") - toc_placeholder_p.insert_paragraph_before().add_run().add_break( - WD_BREAK.PAGE - ) + # Insert a page break before the TOC for better formatting. + if toc_placeholder_p: + toc_placeholder_p.insert_paragraph_before().add_run().add_break( + WD_BREAK.PAGE + ) + with tempfile.NamedTemporaryFile( delete=False, suffix=".docx" ) as temp_file: pypandoc.convert_text( content_for_pandoc, "docx", - format="md", + format=pandoc_format, extra_args=pandoc_args, outputfile=temp_file.name, ) temp_files.append(temp_file.name) - _insert_docx_at_paragraph(toc_placeholder_p, temp_file.name) + if toc_placeholder_p: + _insert_docx_at_paragraph(toc_placeholder_p, temp_file.name) + # The main content is now part of the generated TOC doc, so remove the placeholder. _remove_paragraph(_find_placeholder_paragraph(doc, "%%DOC_CONTENT%%")) else: + # If no TOC, just insert the content at its placeholder. log.info("Adding page break before main content.") content_placeholder_p = _find_placeholder_paragraph( doc, "%%DOC_CONTENT%%" ) - content_placeholder_p.insert_paragraph_before().add_run().add_break( - WD_BREAK.PAGE - ) + if content_placeholder_p: + content_placeholder_p.insert_paragraph_before().add_run().add_break( + WD_BREAK.PAGE + ) + with tempfile.NamedTemporaryFile( delete=False, suffix=".docx" ) as temp_file: + # We don't add '--toc' to pandoc_args here. pypandoc.convert_text( content_for_pandoc, "docx", - format="md", + format=pandoc_format, extra_args=pandoc_args, outputfile=temp_file.name, ) temp_files.append(temp_file.name) - _insert_docx_at_paragraph(content_placeholder_p, temp_file.name) + if content_placeholder_p: + _insert_docx_at_paragraph(content_placeholder_p, temp_file.name) + # TOC placeholder is not used, so remove it. _remove_paragraph(_find_placeholder_paragraph(doc, "%%DOC_TOC%%")) else: + # If there is no main content, remove both placeholders. _remove_paragraph(_find_placeholder_paragraph(doc, "%%DOC_TOC%%")) _remove_paragraph(_find_placeholder_paragraph(doc, "%%DOC_CONTENT%%")) + doc.save(output_file) log.info(f"Document successfully created at {output_file}") finally: diff --git a/markdownconverter/gui/gui.py b/markdownconverter/gui/gui.py index bc6fc6d..6e707fd 100644 --- a/markdownconverter/gui/gui.py +++ b/markdownconverter/gui/gui.py @@ -149,13 +149,19 @@ class MarkdownConverterApp: self.dynamic_entry_vars = {} self.loaded_profile_name = "" self.selected_file = StringVar(value=self.config.get(KEY_LAST_MARKDOWN, "")) - self.active_profile_name = StringVar() # Inizializzata vuota + + # --- NEW CODE --- + # Add a trace to the StringVar. Whenever its value is written, + # the _on_selected_file_change callback will be executed. + self.selected_file.trace_add("write", self._on_selected_file_change) + + self.active_profile_name = StringVar() # Initialized empty self.add_toc_var = BooleanVar(value=True) self.docx_output_path = StringVar() self.pdf_direct_output_path = StringVar() self.pdf_from_docx_output_path = StringVar() self._build_ui() - self.update_profile_combobox() # Questo imposterà active_profile_name + self.update_profile_combobox() # This will set active_profile_name self.root.protocol("WM_DELETE_WINDOW", self._on_closing) def _setup_logging(self): @@ -345,6 +351,13 @@ class MarkdownConverterApp: shutdown_logging_system() self.root.destroy() + def _on_selected_file_change(self, *args): + """ + Callback function that is triggered whenever the self.selected_file + StringVar is written to. It ensures the output paths are updated. + """ + self._update_output_paths() + def run_app(): root = tb.Window(themename="sandstone") app = MarkdownConverterApp(root)