update format

2025-07-22 14:48:55 +02:00 · 2025-07-22 14:48:55 +02:00 · 4edfc33bdb
commit 4edfc33bdb
parent f3f2edae57
8 changed files with 95 additions and 21 deletions
--- a/.~lock.TemplateSumSample.docx#
+++ b/.~lock.TemplateSumSample.docx#
@ -1 +0,0 @@
 Luca Vallongo,Win11_Dev/admin,Win11_Dev,18.06.2025 08:03,file:///C:/Users/admin/AppData/Roaming/LibreOffice/4;
--- a/titoli.docx
+++ b/titoli.docx
--- a/TemplateSumSample.docx
+++ b/TemplateSumSample.docx
--- a/config/app_config.json
+++ b/config/app_config.json
@ -1,6 +1,6 @@
 {
-    "last_markdown_file": "C:/src/____GitProjects/cpp_python_debug/doc/Italian-manual.md",
+    "last_markdown_file": "C:/src/____GitProjects/radar_data_reader/doc/English-manual.md",
-    "last_selected_profile": "cpp_python_debug",
+    "last_selected_profile": "radar_data_reader",
    "profiles": {
        "cpp_python_debug": {
            "template_path": "C:/src/____GitProjects/cpp_python_debug/doc/TemplateSumSample.docx",
@ -12,6 +12,28 @@
                "%%DOC_REV%%": "A2",
                "%%DOC_SECURITY%%": "INTERNAL"
            }
        },
        "radar_data_reader": {
            "template_path": "C:/src/____GitProjects/MarkdownConverter/TemplateSumSample.docx",
            "values": {
                "%%DOC_CUSTOMER%%": "INTERNAL",
                "%%DOC_DATE%%": "22/07/2025",
                "%%DOC_NUMBER%%": "90000002",
                "%%DOC_PROJECT%%": "RADAR DATA READER",
                "%%DOC_REV%%": "A",
                "%%DOC_SECURITY%%": "INTERNAL"
            }
        },
        "mark_converter": {
            "template_path": "C:/src/____GitProjects/MarkdownConverter/TemplateSumSample.docx",
            "values": {
                "%%DOC_CUSTOMER%%": "INTERNO",
                "%%DOC_DATE%%": "22/07/2025",
                "%%DOC_NUMBER%%": "900000003",
                "%%DOC_PROJECT%%": "MARK CONVERTER",
                "%%DOC_REV%%": "A",
                "%%DOC_SECURITY%%": "INTERNAL"
            }
        }
    }
 }
--- a/doc/MARK_CONVERTER_SUM_900000003_A_20250722.docx
+++ b/doc/MARK_CONVERTER_SUM_900000003_A_20250722.docx
--- a/doc/MARK_CONVERTER_SUM_900000003_A_20250722.pdf
+++ b/doc/MARK_CONVERTER_SUM_900000003_A_20250722.pdf
--- a/markdownconverter/core/core.py
+++ b/markdownconverter/core/core.py
@ -208,19 +208,27 @@ def _convert_to_pdf(markdown_text: str, output_file: str, add_toc: bool):
    log.info("Starting PDF conversion using pdfkit.")
    if config is None:
        raise FileNotFoundError("wkhtmltopdf executable not found. Cannot create PDF.")
    title = _get_document_title(markdown_text)
    content_without_title = markdown_text
    match = re.search(r"^\s*#+\s+(.+)\n?", markdown_text, re.MULTILINE)
    if match:
        content_without_title = markdown_text[match.end() :]
-    md_converter = markdown.Markdown(extensions=["toc", "fenced_code", "tables"])
+
    # Previous code:
    # md_converter = markdown.Markdown(extensions=["toc", "fenced_code", "tables"])
    # New code with 'nl2br' extension:
    md_converter = markdown.Markdown(extensions=["toc", "fenced_code", "tables", "nl2br"])
    html_body = md_converter.convert(content_without_title)
    toc_html = ""
    if add_toc and hasattr(md_converter, "toc") and md_converter.toc:
        log.info("Generating Table of Contents for PDF.")
        toc_html = f"<h2>Table of Contents</h2>{md_converter.toc}<div style='page-break-after: always;'></div>"
    full_html = f'<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><title>{title}</title><style>body{{font-family:sans-serif;}}h1,h2{{border-bottom:1px solid #eaecef;padding-bottom:.3em;}}</style></head><body><h1>{title}</h1>{toc_html}{html_body}</body></html>'
    pdf_options = {"encoding": "UTF-8", "enable-local-file-access": None}
    pdfkit.from_string(
        full_html, output_file, configuration=config, options=pdf_options
    )
@ -255,67 +263,99 @@ def _convert_to_docx(
    rev_history_md, main_content_md = _split_markdown_by_revision_history(markdown_text)
    _add_revision_table(doc, rev_history_md)
    temp_files = []
    pandoc_format = "markdown+hard_line_breaks"
    try:
        if main_content_md:
            content_for_pandoc = main_content_md
            # Step 1: Remove the main H1 document title from the content to be processed.
            # It's used for metadata, not for the main body's numbering.
            match = re.search(r"^\s*#\s+(.+)\n?", content_for_pandoc, re.MULTILINE)
            if match:
-                log.info(
+                log.info("Removing main H1 title from content body.")
                    "Removing main title from content to exclude it from DOCX TOC."
                )
                content_for_pandoc = content_for_pandoc[match.end() :]
-            log.info("Stripping manual numbering from headings.")
+
            # Step 2: Strip any existing manual numbering from headings (e.g., "## 1. Title")
            # to prevent double numbering when automatic numbering is applied.
            log.info("Stripping manual numbering from headings for auto-numbering.")
            content_for_pandoc = re.sub(
                r"^(\s*#+)\s+[0-9\.]+\s+",
                r"\1 ",
                content_for_pandoc,
                flags=re.MULTILINE,
            )
-            pandoc_args = ["--shift-heading-level-by=-1"]
+            
            # Step 3: Configure Pandoc arguments for correct hierarchical numbering.
            pandoc_args = [
                # Enable automatic section numbering (e.g., 1, 1.1, 1.1.1).
                "--number-sections",
                # Shift heading levels up by one. This maps:
                # ## (H2 in MD) -> Heading 1 in DOCX (numbered as 1, 2, ...)
                # ### (H3 in MD) -> Heading 2 in DOCX (numbered as 1.1, 1.2, ...)
                "--shift-heading-level-by=-1",
                # Keep text left-aligned.
                "--variable=justify:false",
            ]
            if add_toc:
                pandoc_args.append("--toc")
                log.info("Adding page break before Table of Contents.")
                toc_placeholder_p = _find_placeholder_paragraph(doc, "%%DOC_TOC%%")
-                toc_placeholder_p.insert_paragraph_before().add_run().add_break(
+                # Insert a page break before the TOC for better formatting.
-                    WD_BREAK.PAGE
+                if toc_placeholder_p:
-                )
+                    toc_placeholder_p.insert_paragraph_before().add_run().add_break(
                        WD_BREAK.PAGE
                    )
                with tempfile.NamedTemporaryFile(
                    delete=False, suffix=".docx"
                ) as temp_file:
                    pypandoc.convert_text(
                        content_for_pandoc,
                        "docx",
-                        format="md",
+                        format=pandoc_format,
                        extra_args=pandoc_args,
                        outputfile=temp_file.name,
                    )
                    temp_files.append(temp_file.name)
-                    _insert_docx_at_paragraph(toc_placeholder_p, temp_file.name)
+                    if toc_placeholder_p:
                        _insert_docx_at_paragraph(toc_placeholder_p, temp_file.name)
                # The main content is now part of the generated TOC doc, so remove the placeholder.
                _remove_paragraph(_find_placeholder_paragraph(doc, "%%DOC_CONTENT%%"))
            else:
                # If no TOC, just insert the content at its placeholder.
                log.info("Adding page break before main content.")
                content_placeholder_p = _find_placeholder_paragraph(
                    doc, "%%DOC_CONTENT%%"
                )
-                content_placeholder_p.insert_paragraph_before().add_run().add_break(
+                if content_placeholder_p:
-                    WD_BREAK.PAGE
+                    content_placeholder_p.insert_paragraph_before().add_run().add_break(
-                )
+                        WD_BREAK.PAGE
                    )
                with tempfile.NamedTemporaryFile(
                    delete=False, suffix=".docx"
                ) as temp_file:
                    # We don't add '--toc' to pandoc_args here.
                    pypandoc.convert_text(
                        content_for_pandoc,
                        "docx",
-                        format="md",
+                        format=pandoc_format,
                        extra_args=pandoc_args,
                        outputfile=temp_file.name,
                    )
                    temp_files.append(temp_file.name)
-                    _insert_docx_at_paragraph(content_placeholder_p, temp_file.name)
+                    if content_placeholder_p:
                        _insert_docx_at_paragraph(content_placeholder_p, temp_file.name)
                # TOC placeholder is not used, so remove it.
                _remove_paragraph(_find_placeholder_paragraph(doc, "%%DOC_TOC%%"))
        else:
            # If there is no main content, remove both placeholders.
            _remove_paragraph(_find_placeholder_paragraph(doc, "%%DOC_TOC%%"))
            _remove_paragraph(_find_placeholder_paragraph(doc, "%%DOC_CONTENT%%"))
        doc.save(output_file)
        log.info(f"Document successfully created at {output_file}")
    finally:
--- a/markdownconverter/gui/gui.py
+++ b/markdownconverter/gui/gui.py
@ -149,13 +149,19 @@ class MarkdownConverterApp:
        self.dynamic_entry_vars = {}
        self.loaded_profile_name = ""
        self.selected_file = StringVar(value=self.config.get(KEY_LAST_MARKDOWN, ""))
-        self.active_profile_name = StringVar() # Inizializzata vuota
+        
        # --- NEW CODE ---
        # Add a trace to the StringVar. Whenever its value is written,
        # the _on_selected_file_change callback will be executed.
        self.selected_file.trace_add("write", self._on_selected_file_change)
        self.active_profile_name = StringVar() # Initialized empty
        self.add_toc_var = BooleanVar(value=True)
        self.docx_output_path = StringVar()
        self.pdf_direct_output_path = StringVar()
        self.pdf_from_docx_output_path = StringVar()
        self._build_ui()
-        self.update_profile_combobox() # Questo imposterà active_profile_name
+        self.update_profile_combobox() # This will set active_profile_name
        self.root.protocol("WM_DELETE_WINDOW", self._on_closing)
    def _setup_logging(self):
@ -345,6 +351,13 @@ class MarkdownConverterApp:
        shutdown_logging_system()
        self.root.destroy()
    def _on_selected_file_change(self, *args):
        """
        Callback function that is triggered whenever the self.selected_file
        StringVar is written to. It ensures the output paths are updated.
        """
        self._update_output_paths()
 def run_app():
    root = tb.Window(themename="sandstone")
    app = MarkdownConverterApp(root)
		`@ -1 +0,0 @@`
			`Luca Vallongo,Win11_Dev/admin,Win11_Dev,18.06.2025 08:03,file:///C:/Users/admin/AppData/Roaming/LibreOffice/4;`