SXXXXXXX_ProjectUtility/projectutility/gui/process_worker.py

# projectutility/gui/process_worker.py

import subprocess
import threading
import queue
import logging
import os
import sys  # To check platform for startupinfo adjustments
from typing import Dict, Any, List, Optional # Added Optional

# --- Import Core Models ---
# Use absolute import from the main package 'projectutility'
try:
    from projectutility.core.models import ToolInfo, ToolParameter
    MODELS_IMPORTED = True
except ImportError as e:
    # This is critical. Log the error. MainWindow should ideally prevent
    # instantiation of ProcessWorker if core modules failed to load.
    logging.getLogger(__name__).critical(
        f"Failed to import core models (ToolInfo, ToolParameter): {e}. "
        f"ProcessWorker cannot function correctly.",
        exc_info=True
    )
    MODELS_IMPORTED = False
    # Define dummy classes to potentially allow the rest of the file to be parsed
    # without NameErrors, although runtime execution will fail.
    from collections import namedtuple
    ToolInfo = namedtuple("ToolInfo", ["id", "display_name", "description", "command", "working_dir", "parameters", "version", "has_gui"])
    ToolParameter = namedtuple("ToolParameter", ["name", "label", "type", "required", "default", "description", "options"])


class ProcessWorker:
    """
    Executes a tool's command in a separate thread using subprocess.

    It captures the standard output (stdout) and standard error (stderr)
    streams of the executed process and sends messages containing this output,
    along with status updates, back to the main GUI thread via a shared queue.
    This prevents blocking the GUI during potentially long-running tool executions.
    """

    def __init__(
        self,
        run_id: str,
        tool_info: ToolInfo,
        parameters: Dict[str, Any],
        output_queue: queue.Queue,
    ) -> None:
        """
        Initializes the ProcessWorker for a specific tool execution.

        Args:
            run_id: A unique identifier for this execution instance (e.g., tool_id + timestamp).
            tool_info: The ToolInfo object containing details about the tool to run
                       (command, working directory, parameter definitions, etc.).
            parameters: A dictionary containing the parameter values provided by the user
                        through the GUI (keys should match ToolParameter names).
            output_queue: The queue.Queue instance used to communicate back to the
                          main GUI thread (MainWindow). Messages are dictionaries.
        """
        self.run_id = run_id
        self.tool_info = tool_info
        self.parameters = parameters
        self.output_queue = output_queue
        # Create a logger specific to this worker instance using the run_id
        self.logger = logging.getLogger(f"{__name__}.{self.run_id}")
        self.logger.info(f"Initializing worker for tool: '{self.tool_info.display_name}'")

        # Internal state variables for managing the subprocess and threads
        self._process: Optional[subprocess.Popen] = None # Holds the Popen object once started
        self._stdout_thread: Optional[threading.Thread] = None # Thread for reading stdout
        self._stderr_thread: Optional[threading.Thread] = None # Thread for reading stderr
        self._stop_event = threading.Event() # Event to signal termination request (optional use)

        if not MODELS_IMPORTED:
             self.logger.error("ProcessWorker initialized but core models failed to import. Execution will likely fail.")
             # Optionally raise an exception here to prevent faulty execution?
             # raise RuntimeError("ProcessWorker cannot operate without core models.")

    def _build_command_list(self) -> List[str]:
        """
        Constructs the full command list to be executed by subprocess.

        Starts with the base command from tool_info.command and appends
        arguments based on the provided parameters, following a simple
        convention (e.g., --param_name value for most types, --flag for booleans).

        Returns:
            A list of strings representing the command and its arguments.

        Raises:
            ValueError: If a required parameter is missing a value (although the GUI
                        should ideally prevent this). Can be modified to just log a warning.
        """
        # Start with a copy of the base command list from ToolInfo
        # ToolInfo.command should already contain resolved absolute paths for scripts if needed.
        command: List[str] = list(self.tool_info.command)

        self.logger.debug(f"Base command from ToolInfo: {command}")
        self.logger.debug(f"Received parameters for execution: {self.parameters}")

        # Iterate through the parameter definitions specified in ToolInfo
        # to correctly format them for the command line.
        for param_def in self.tool_info.parameters:
            param_name: str = param_def.name
            param_type: str = param_def.type.lower() # Ensure type comparison is case-insensitive
            # Get the value provided by the user, falling back to the default if not provided
            value: Any = self.parameters.get(param_name, param_def.default)

            self.logger.debug(f"Processing parameter '{param_name}': Type='{param_type}', Required={param_def.required}, Value='{value}' (Type: {type(value).__name__})")

            # --- Parameter Handling Logic ---

            # 1. Handle missing required parameters
            if param_def.required and value is None:
                # This indicates an issue, as the GUI should enforce required params.
                # Option 1: Raise an error to stop execution immediately.
                # Option 2: Log an error and let the tool potentially fail. (Current approach)
                self.logger.error(
                    f"Required parameter '{param_name}' is missing a value (is None). "
                    f"This should have been caught by the GUI. The tool might fail."
                )
                # If choosing Option 1, uncomment the line below:
                # raise ValueError(f"Required parameter '{param_name}' is missing a value.")
                continue # Skip adding this parameter to the command line

            # 2. Skip optional parameters that have no value (are None)
            # We explicitly check for None, as values like False, 0, or "" might be valid.
            if not param_def.required and value is None:
                self.logger.debug(
                    f"Skipping optional parameter '{param_name}' because its value is None."
                )
                continue

            # 3. Format and append the parameter based on type

            # Boolean parameters: Append flag only if True
            if param_type == "boolean":
                # Convert value to boolean robustly
                is_true = str(value).lower() in ["true", "1", "yes", "on"] if isinstance(value, str) else bool(value)
                if is_true:
                    command.append(f"--{param_name}")
                    self.logger.debug(f"Appending boolean flag: --{param_name}")
                else:
                     self.logger.debug(f"Skipping boolean flag for '{param_name}' as its value is False.")

            # Other parameter types (string, integer, float, file, folder, etc.):
            # Append --parameter_name followed by the string representation of the value.
            # We already handled the case where value is None for optional params.
            # If value is None for a required param, we logged an error earlier.
            # If value is not None here, we add it.
            else:
                 # Ensure we have a non-None value before adding --name and value
                 if value is not None:
                     command.append(f"--{param_name}")
                     # Convert the value to string for the command line
                     # Path parameters (file/folder) should already be strings from the GUI.
                     command.append(str(value))
                     # Log value being added (be mindful of sensitive data if logging in production)
                     # Potentially truncate long values in logs:
                     # value_str = str(value)
                     # logged_value = value_str[:100] + '...' if len(value_str) > 100 else value_str
                     logged_value = str(value)
                     self.logger.debug(f"Appending parameter: --{param_name} {logged_value}")
                 else:
                      # This case should theoretically not be reached due to earlier checks
                      self.logger.warning(
                          f"Parameter '{param_name}' unexpectedly has None value at the "
                          f"appending stage. Skipping."
                      )

        self.logger.info(f"Final command list constructed: {command}")
        return command

    def _stream_reader(self, stream, stream_type: str) -> None:
        """
        Reads lines from a given stream (stdout or stderr) and puts them onto the output queue.

        This function runs in a separate thread for each stream. It reads line by line
        until the stream is closed (process terminates) or an error occurs.

        Args:
            stream: The stream object to read from (e.g., process.stdout).
            stream_type: A string indicating the type of stream ("stdout" or "stderr"),
                         used for tagging messages sent to the queue.
        """
        self.logger.debug(f"Stream reader thread started for: {stream_type}")
        try:
            # Use iter(stream.readline, '') which efficiently reads lines
            # until EOF is reached (readline() returns an empty string).
            # This works correctly for text-mode streams.
            for line in iter(stream.readline, ""):
                # Optional: Check if termination was requested externally
                # if self._stop_event.is_set():
                #     self.logger.info(f"Stop event detected, terminating {stream_type} reader.")
                #     break

                if line: # Ensure the line is not empty
                    # Strip trailing newline? No, send the raw line usually.
                    # Let the receiver handle formatting if needed.
                    # self.logger.debug(f"Read {stream_type}: {line.rstrip()}") # Example log
                    self.output_queue.put(
                        {
                            "type": stream_type, # "stdout" or "stderr"
                            "run_id": self.run_id,
                            "data": line, # Send the raw line data, including newline
                        }
                    )
                # else: # readline() returned empty string, meaning EOF
                #     break # Exit the loop cleanly on EOF

            self.logger.debug(f"{stream_type} stream reading finished (EOF or thread stop).")

        except ValueError as e:
            # This exception can occur if the stream is closed unexpectedly while reading,
            # often happening if the process is forcefully terminated.
            if "I/O operation on closed file" in str(e):
                 self.logger.warning(
                     f"ValueError reading {stream_type} stream: Stream was likely closed "
                     f"by process termination. ({e})"
                 )
            else:
                 self.logger.error(
                     f"ValueError reading {stream_type} stream: {e}", exc_info=True
                 )

        except Exception as e:
            # Catch any other unexpected errors during stream reading
            self.logger.exception(f"An unexpected error occurred in the {stream_type} stream reader thread.")
            # Try to send an error message back to the GUI if possible
            try:
                self.output_queue.put(
                    {
                        "type": "stderr", # Report reader errors as stderr
                        "run_id": self.run_id,
                        "data": f"*** ProjectUtility Error: Failed reading {stream_type} stream: {e} ***\n",
                    }
                )
            except Exception as q_err:
                 self.logger.error(f"Failed to send stream reader error to queue: {q_err}")

        finally:
            # Clean up: Ensure the stream associated with this reader is closed
            # Although Popen should manage this when the process ends,
            # closing it here defensively doesn't hurt.
            try:
                if stream and not stream.closed:
                    stream.close()
                    self.logger.debug(f"{stream_type} stream explicitly closed by reader thread.")
            except Exception as close_err:
                # Ignore errors during close, as the stream might already be closed
                self.logger.debug(f"Ignoring error while closing {stream_type} stream: {close_err}")


    def run(self) -> None:
        """
        Executes the tool's command in a subprocess and manages output streaming threads.

        This method is intended to be the target of the worker thread created by MainWindow.
        It performs the following steps:
        1. Builds the command list using _build_command_list().
        2. Configures subprocess startup info (e.g., hide console on Windows if needed).
        3. Starts the subprocess using subprocess.Popen.
        4. Starts separate threads (_stdout_thread, _stderr_thread) to read output streams.
        5. Waits for the subprocess to complete using process.wait().
        6. Waits for the stream reader threads to finish.
        7. Sends a final "finished" message to the output queue with the exit code.
        """
        self.logger.info(f"Worker run method started. Preparing to execute command.")
        final_exit_code: Optional[int] = None # Use None to indicate potential failure before getting code

        if not MODELS_IMPORTED:
             self.logger.error("Cannot run process: Core models were not imported successfully.")
             # Send error message and finish immediately
             self.output_queue.put({
                 "type": "stderr",
                 "run_id": self.run_id,
                 "data": "*** ProjectUtility Error: Cannot run tool due to failed model imports. ***\n"
             })
             self.output_queue.put({
                 "type": "finished",
                 "run_id": self.run_id,
                 "exit_code": -99, # Special code for setup failure
             })
             return

        try:
            # --- 1. Build Command ---
            command_list = self._build_command_list()

            # --- 2. Configure Startup Info (Platform Specific) ---
            startupinfo = None
            creationflags = 0 # Windows specific process creation flags

            # On Windows, hide the console window for non-GUI tools
            if sys.platform == "win32":
                if not self.tool_info.has_gui:
                    self.logger.debug("Configuring subprocess: Hide console window (non-GUI tool on Windows).")
                    startupinfo = subprocess.STARTUPINFO()
                    startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
                    startupinfo.wShowWindow = subprocess.SW_HIDE
                    # Alternative/Additional flag: CREATE_NO_WINDOW
                    # creationflags = subprocess.CREATE_NO_WINDOW
                else:
                     # For GUI tools on Windows, allow the window to show
                     self.logger.debug("Configuring subprocess: Allow console/GUI window (GUI tool on Windows).")
                     # No special flags needed to allow window display


            # --- 3. Start Subprocess ---
            effective_working_dir = self.tool_info.working_dir
            self.logger.info(f"Executing command: {' '.join(command_list)}")
            self.logger.info(f"Working Directory: {effective_working_dir}")

            # Ensure working directory exists (Popen might fail otherwise)
            if not os.path.isdir(effective_working_dir):
                 self.logger.error(f"Working directory '{effective_working_dir}' does not exist. Process cannot start.")
                 raise FileNotFoundError(f"Working directory not found: {effective_working_dir}")

            self._process = subprocess.Popen(
                command_list,
                cwd=effective_working_dir, # Set the working directory
                stdout=subprocess.PIPE,      # Capture stdout
                stderr=subprocess.PIPE,      # Capture stderr
                text=True,                   # Decode streams as text (using default encoding or specified)
                encoding="utf-8",            # Be explicit about encoding (adjust if needed)
                errors="replace",            # Handle potential decoding errors gracefully
                bufsize=1,                   # Use line buffering for stdout/stderr
                startupinfo=startupinfo,     # Pass Windows specific startup info
                creationflags=creationflags, # Pass Windows specific creation flags
            )
            # Log process start immediately after Popen returns
            self.logger.info(f"Subprocess started successfully with PID: {self._process.pid}")
            # Send PID update to GUI? MainWindow handles this via after() currently.

            # --- 4. Start Stream Reader Threads ---
            if self._process.stdout:
                self._stdout_thread = threading.Thread(
                    target=self._stream_reader,
                    args=(self._process.stdout, "stdout"),
                    daemon=True, # Daemon threads exit automatically if main program exits
                    name=f"{self.run_id}_stdout_reader" # Assign name for easier debugging
                )
                self._stdout_thread.start()
                self.logger.debug("Stdout reader thread started.")
            else:
                # This should not happen with stdout=subprocess.PIPE, but log if it does
                self.logger.warning("Process stdout stream is unexpectedly None.")

            if self._process.stderr:
                self._stderr_thread = threading.Thread(
                    target=self._stream_reader,
                    args=(self._process.stderr, "stderr"),
                    daemon=True,
                    name=f"{self.run_id}_stderr_reader"
                )
                self._stderr_thread.start()
                self.logger.debug("Stderr reader thread started.")
            else:
                 self.logger.warning("Process stderr stream is unexpectedly None.")

            # --- 5. Wait for Process Completion ---
            # wait() blocks this worker thread (not the GUI thread) until the process finishes.
            # It returns the process exit code.
            self.logger.debug(f"Worker thread waiting for subprocess (PID: {self._process.pid}) to complete...")
            final_exit_code = self._process.wait()
            self.logger.info(
                f"Subprocess (PID: {self._process.pid}) finished with exit code: {final_exit_code}"
            )

            # --- 6. Wait for Reader Threads to Finish ---
            # After the process finishes and streams are closed, the reader threads
            # should exit their loops naturally. We join them to ensure they complete
            # processing any remaining buffered output before we send the 'finished' message.
            join_timeout = 2.0 # Seconds to wait for reader threads
            if self._stdout_thread and self._stdout_thread.is_alive():
                self.logger.debug(f"Waiting for stdout reader thread to join (timeout={join_timeout}s)...")
                self._stdout_thread.join(timeout=join_timeout)
                if self._stdout_thread.is_alive():
                    self.logger.warning("Stdout reader thread did not join within the timeout.")
            if self._stderr_thread and self._stderr_thread.is_alive():
                 self.logger.debug(f"Waiting for stderr reader thread to join (timeout={join_timeout}s)...")
                 self._stderr_thread.join(timeout=join_timeout)
                 if self._stderr_thread.is_alive():
                    self.logger.warning("Stderr reader thread did not join within the timeout.")

        except FileNotFoundError as e:
            # Error if the command executable was not found
            self.logger.error(f"Command execution failed: The command '{command_list[0]}' was not found. {e}", exc_info=True)
            # Send specific error message to GUI
            error_msg = (
                f"ERROR: Command not found: '{command_list[0]}'.\n"
                f"Please ensure the program is installed and in the system's PATH, "
                f"or the path in the configuration is correct.\nDetails: {e}\n"
            )
            self.output_queue.put({"type": "stderr", "run_id": self.run_id, "data": error_msg})
            final_exit_code = -10 # Use a distinct error code for file not found

        except PermissionError as e:
            # Error if there's no permission to execute the command
            self.logger.error(f"Command execution failed: Permission denied for '{command_list[0]}'. {e}", exc_info=True)
            error_msg = f"ERROR: Permission denied to execute command: '{command_list[0]}'.\nDetails: {e}\n"
            self.output_queue.put({"type": "stderr", "run_id": self.run_id, "data": error_msg})
            final_exit_code = -20 # Distinct error code for permission denied

        except Exception as e:
            # Catch any other unexpected errors during setup or execution
            self.logger.exception("An unexpected error occurred during the process execution workflow.")
            error_msg = f"*** ProjectUtility Worker Error: An unexpected error occurred ***\n{type(e).__name__}: {e}\n"
            self.output_queue.put({"type": "stderr", "run_id": self.run_id, "data": error_msg})
            # If the process started but an error occurred in the worker itself, try to kill the process
            if self._process and self._process.poll() is None:
                self.logger.warning("Terminating subprocess due to unexpected worker error.")
                self.terminate() # Attempt graceful termination first
                # Give it a moment, then force kill if necessary?
                try:
                     self._process.wait(timeout=1.0)
                except subprocess.TimeoutExpired:
                     self.logger.warning("Process did not terminate gracefully after worker error, attempting kill.")
                     self._process.kill()
            # Use a distinct error code for unexpected worker errors
            final_exit_code = -30

        finally:
            # --- 7. Send Final Status Message ---
            # This block ensures the 'finished' message is sent reliably,
            # regardless of whether the process ran successfully or an error occurred.
            if final_exit_code is None:
                # This case might happen if a critical error occurred *before*
                # process.wait() could be called successfully (e.g., Popen failed silently).
                # Or if the finally block is reached via an unexpected path.
                final_exit_code = -40 # Indicate failure before process completion was confirmed
                self.logger.error("Worker finished, but final exit code was not determined. Reporting error code.")

            self.output_queue.put(
                {
                    "type": "finished",
                    "run_id": self.run_id,
                    "exit_code": final_exit_code,
                }
            )
            self.logger.info(
                f"Worker thread completed for run_id: {self.run_id}. "
                f"Final exit code reported to queue: {final_exit_code}"
            )
            # Clean up process reference after it has finished
            self._process = None


    def terminate(self) -> None:
        """
        Requests termination of the running subprocess managed by this worker.

        It first sends a SIGTERM signal (or TerminateProcess on Windows).
        Does not guarantee immediate termination. The 'finished' message will
        be sent by the run() method when the process actually exits.
        """
        # Check if the process object exists and if it's still running
        # poll() returns None if running, or the exit code if terminated.
        if self._process and self._process.poll() is None:
            pid = self._process.pid
            self.logger.warning(
                f"Attempting to terminate subprocess (PID: {pid}) for run '{self.run_id}'..."
            )
            # Optional: Signal stream readers to stop trying to read
            # self._stop_event.set()

            try:
                # Send SIGTERM (Unix) or TerminateProcess (Windows)
                self._process.terminate()
                self.logger.info(f"Sent terminate signal to process {pid}.")
                # Optionally, wait a short period and then force kill if needed:
                # try:
                #     self._process.wait(timeout=1.0) # Wait 1 second
                #     self.logger.info(f"Process {pid} terminated gracefully.")
                # except subprocess.TimeoutExpired:
                #     self.logger.warning(f"Process {pid} did not terminate after SIGTERM, sending SIGKILL...")
                #     self._process.kill() # Force kill
                #     self.logger.info(f"Sent kill signal to process {pid}.")
            except OSError as e:
                 # Errors can occur if the process terminated between poll() and terminate()
                 self.logger.error(f"OS error while trying to terminate process {pid}: {e}")
            except Exception as e:
                self.logger.exception(f"Unexpected error while trying to terminate process {pid}")
        else:
            status = "already finished" if self._process and self._process.poll() is not None else "does not exist or not running"
            self.logger.info(
                f"Cannot terminate process for run '{self.run_id}': Process {status}."
            )