add multisegment analisys, fix trunc file into g_reconvert report

2025-09-17 08:30:30 +02:00 · 2025-09-17 08:30:30 +02:00 · 73e5bb8aa2
commit 73e5bb8aa2
parent d12cca39aa
3 changed files with 168 additions and 78 deletions
--- a/config/config.json
+++ b/config/config.json
@ -3,7 +3,7 @@
    "last_opened_rec_file": "C:/src/____GitProjects/radar_data_reader/_rec/_25-05-15-12-22-52_sata_345.rec",
    "last_out_output_dir": "C:\\src\\____GitProjects\\radar_data_reader\\out_analisys",
    "last_rec_output_dir": "C:\\src\\____GitProjects\\radar_data_reader\\_rec",
-    "last_flight_folder": "C:/__Voli/Volo_12_25maggio2025/rec/rec",
+    "last_flight_folder": "C:/__Voli/_t_bay_scan",
    "last_flight_workspace_parent_dir": "C:/src/____GitProjects/radar_data_reader/flight_workspace/",
    "active_out_export_profile_name": "trackingdata",
    "export_profiles": [
--- a/radar_data_reader/core/export_manager.py
+++ b/radar_data_reader/core/export_manager.py
@ -83,11 +83,19 @@ class ExportManager:
        if not exe_path or not Path(exe_path).is_file():
            raise ValueError(f"g_reconverter executable not found at: {exe_path}")
-        # Ora job.start_file e job.end_file dovrebbero essere corretti
+        # --- NEW ROBUST LOGIC ---
-        first_rec_path = job.rec_folder / job.start_file
+        # The filename from the summary can be truncated in various ways (.re, or no extension).
        # First, reconstruct the full, correct filename for finding the file on disk.
        start_file_base = job.start_file.split('.')[0]
        start_filename_full = start_file_base + '.rec'
        first_rec_path = job.rec_folder / start_filename_full
        # Second, use a flexible regex to extract the sequence number from the (possibly truncated) name.
        # This regex handles .rec, .re, or no extension.
        start_num_match = re.search(r"_(\d+)(\.re(c?)?)?$", job.start_file)
        end_num_match = re.search(r"_(\d+)(\.re(c?)?)?$", job.end_file)
        # --- END NEW LOGIC ---
        start_num_match = re.search(r"_(\d+)\.rec$", job.start_file)
        end_num_match = re.search(r"_(\d+)\.rec$", job.end_file)
        if not start_num_match or not end_num_match:
            log.error(
                f"Could not parse sequence number from filenames: '{job.start_file}', '{job.end_file}'"
--- a/radar_data_reader/core/flight_analyzer.py
+++ b/radar_data_reader/core/flight_analyzer.py
@ -13,6 +13,7 @@ import re
 from pathlib import Path
 from typing import List, Optional
 from datetime import timedelta
 from collections import defaultdict
 try:
    import pandas as pd
@ -27,6 +28,7 @@ log = logger.get_logger(__name__)
 TICK_DURATION_S = 64e-6
 class FlightAnalyzer:
    """Manages the multi-step process of analyzing a flight folder."""
@ -39,7 +41,11 @@ class FlightAnalyzer:
        self.analysis_options: dict = {}
    def start_analysis(
-        self, rec_folder_str: str, flight_name: str, workspace_path: Path, analysis_options: dict
+        self,
        rec_folder_str: str,
        flight_name: str,
        workspace_path: Path,
        analysis_options: dict,
    ) -> threading.Thread:
        self.current_flight_name = flight_name
        self.analysis_options = analysis_options
@ -52,6 +58,11 @@ class FlightAnalyzer:
        return analysis_thread
    def _flight_analysis_orchestrator(self, rec_folder_str: str, flight_name: str, flight_dir: Path):
        """
        Orchestrates the entire flight analysis process.
        This method now groups .rec files by session, runs the C++ analyzer for each group,
        and then combines the results for final processing.
        """
        self.current_flight_folder_path = None
        try:
            flight_dir.mkdir(parents=True, exist_ok=True)
@ -60,16 +71,28 @@ class FlightAnalyzer:
            cpp_config = self.config_manager.get_cpp_converter_config()
            exe_path = cpp_config.get("cpp_executable_path")
            if not exe_path or not Path(exe_path).is_file():
-                raise ValueError(
+                raise ValueError(f"C++ executable not found at path: {exe_path}")
                    f"C++ executable not found at path: {exe_path}"
                )
            rec_files = sorted(Path(rec_folder_str).glob("*.rec"))
            if not rec_files:
                raise FileNotFoundError("No .rec files found in the specified folder.")
-            first_rec_path = rec_files[0]
+            # Group files by a common base name to handle multiple recording sessions.
-            num_files_to_process = len(rec_files)
+            # The base name is determined by splitting the filename before the last underscore,
            # which typically separates the descriptive part from the sequence number.
            file_groups = defaultdict(list)
            for f in rec_files:
                base_name = f.name.rsplit('_', 1)[0]
                file_groups[base_name].append(f)
            log.info(f"Found {len(rec_files)} .rec files, grouped into {len(file_groups)} recording session(s).")
            all_storyboard_dfs = []
            for group_name, group_files in file_groups.items():
                log.info(f"Processing group '{group_name}' with {len(group_files)} file(s)...")
                first_rec_path = group_files[0]
                num_files_to_process = len(group_files)
                command_list = [
                    str(exe_path),
@ -81,8 +104,10 @@ class FlightAnalyzer:
                    '/vshow',
                ]
-            log.info(f"Running g_reconverter for full analysis: {' '.join(command_list)}")
+                log.info(f"Running g_reconverter for group '{group_name}': {' '.join(command_list)}")
                # Run the C++ process for the current group.
                # This is a blocking call to ensure sequential processing of groups.
                self.worker_process = mp.Process(
                    target=run_cpp_converter,
                    args=(command_list, self.result_queue, str(flight_dir), True),
@ -90,55 +115,71 @@ class FlightAnalyzer:
                )
                self.worker_process.start()
                self.worker_process.join()
                log.info(f"C++ analysis finished for group '{group_name}'.")
-            log.info("g_reconverter full analysis process finished.")
+                # After the C++ process completes, parse its output.
-            self.result_queue.put({"type": "cpp_complete"})
+                # The output file is expected to be in the flight_dir.
-
+                summary_files = [f for f in flight_dir.glob("pp-*.txt") if "aesa" not in f.name.lower()]
        except Exception as e:
            log.error(f"Flight analysis orchestrator failed: {e}", exc_info=True)
            self.result_queue.put({"type": "error", "message": str(e)})
    def handle_final_analysis_steps(self):
        if not self.current_flight_folder_path:
            log.error("Cannot run final analysis steps: flight folder path is not set.")
            self.result_queue.put({"type": "error", "message": "Internal state error: flight folder path missing."})
            return
        try:
            log.info("C++ part complete. Starting Python-side analysis...")
            all_txt_files = list(self.current_flight_folder_path.glob("pp-*.txt"))
            summary_files = [f for f in all_txt_files if "aesa" not in f.name.lower()]
                if not summary_files:
-                raise FileNotFoundError("Main summary file not found after analysis.")
+                    log.warning(f"No summary file found for group '{group_name}'. Skipping.")
                    continue
                summary_txt_path = summary_files[0]
-            log.info(f"Found main summary file: {summary_txt_path.name}")
+                log.info(f"Parsing summary file for group: {summary_txt_path.name}")
-            storyboard_df = self._parse_and_save_storyboard(
+                storyboard_df_group = self._parse_storyboard_from_txt(summary_txt_path)
-                summary_txt_path, self.current_flight_folder_path
+                if storyboard_df_group is not None and not storyboard_df_group.empty:
-            )
+                    all_storyboard_dfs.append(storyboard_df_group)
            if storyboard_df is None or storyboard_df.empty:
                raise ValueError("Parsing storyboard failed or resulted in empty data.")
-            summary_df = self._create_and_save_summary(
+                # Clean up the output file to prevent it from being used by the next iteration.
-                storyboard_df, self.current_flight_folder_path, self.analysis_options
+                try:
-            )
+                    summary_txt_path.unlink()
                except OSError as e:
                    log.warning(f"Could not delete temporary summary file {summary_txt_path}: {e}")
-            self._create_flight_report_txt(summary_df, self.current_flight_folder_path)
+
            if not all_storyboard_dfs:
                raise ValueError("Analysis failed: No storyboard data could be parsed from any file group.")
            # Combine all dataframes from all groups into one.
            log.info(f"Combining {len(all_storyboard_dfs)} storyboard segment(s) into a single flight storyboard.")
            storyboard_df = pd.concat(all_storyboard_dfs, ignore_index=True)
            # Sort the combined dataframe by batch number to ensure correct chronological order.
            storyboard_df.sort_values(by="Batch", inplace=True)
            storyboard_df.reset_index(drop=True, inplace=True)
            # --- Start of final analysis steps (previously in handle_final_analysis_steps) ---
            log.info("C++ processing complete for all groups. Starting final Python-side analysis...")
            # Save the final, combined storyboard.
            self._save_storyboard_artifacts(storyboard_df, flight_dir)
            summary_df = self._create_and_save_summary(storyboard_df, flight_dir, self.analysis_options)
            self._create_flight_report_txt(summary_df, flight_dir)
            self.result_queue.put({
                "type": "analysis_summary_data",
                "data": summary_df,
-                "flight_folder_path": self.current_flight_folder_path
+                "flight_folder_path": flight_dir
            })
            log.info("Flight analysis complete. All artifacts saved.")
            self.result_queue.put({"type": "complete", "message": "Analysis successful."})
        except Exception as e:
-            log.error(f"Final analysis steps failed: {e}", exc_info=True)
+            log.error(f"Flight analysis orchestrator failed: {e}", exc_info=True)
            self.result_queue.put({"type": "error", "message": str(e)})
    def handle_final_analysis_steps(self):
        """
        This method is now a placeholder.
        The logic has been moved into _flight_analysis_orchestrator to ensure
        sequential execution after all C++ processes are complete.
        """
        log.info("Final analysis steps are now integrated into the main orchestrator.")
        pass
    def _make_columns_unique(self, columns: List[str]) -> List[str]:
        final_cols, counts = [], {}
        for col in columns:
@ -150,9 +191,8 @@ class FlightAnalyzer:
                final_cols.append(col)
        return final_cols
-    def _parse_and_save_storyboard(
+    def _parse_storyboard_from_txt(self, txt_path: Path) -> Optional["pd.DataFrame"]:
-        self, txt_path: Path, output_dir: Path
+        """Parses a storyboard TXT file into a pandas DataFrame."""
    ) -> Optional["pd.DataFrame"]:
        if pd is None:
            log.error("Pandas library is not installed, cannot parse storyboard.")
            return None
@ -175,6 +215,14 @@ class FlightAnalyzer:
            for col in storyboard_df.select_dtypes(include=['object']).columns:
                storyboard_df[col] = storyboard_df[col].str.strip()
            # --- MODIFICATION START ---
            # Correct truncated .re filenames from the C++ tool's output to .rec
            if 'file' in storyboard_df.columns:
                storyboard_df['file'] = storyboard_df['file'].apply(
                    lambda x: x + 'c' if isinstance(x, str) and x.endswith('.re') else x
                )
            # --- MODIFICATION END ---
            numeric_cols = ["Batch", "TTAG"]
            for col in numeric_cols:
                if col in storyboard_df.columns:
@ -184,27 +232,48 @@ class FlightAnalyzer:
            storyboard_df["Batch"] = storyboard_df["Batch"].astype(int)
            storyboard_df["TTAG"] = storyboard_df["TTAG"].astype(int)
        except Exception as e:
            log.error(f"Failed to read or process summary file {txt_path.name}: {e}")
            return None
            if storyboard_df.empty:
                log.warning(f"DataFrame is empty after cleaning {txt_path.name}")
                return None
            return storyboard_df
        except Exception as e:
            log.error(f"Failed to read or process summary file {txt_path.name}: {e}")
            return None
    def _save_storyboard_artifacts(self, storyboard_df: "pd.DataFrame", output_dir: Path):
        """Saves the final storyboard DataFrame to CSV and JSON."""
        csv_path = output_dir / "flight_storyboard.csv"
        json_path = output_dir / "flight_storyboard.json"
-        log.info(f"Saving full storyboard to {csv_path}")
+        log.info(f"Saving final combined storyboard to {csv_path}")
        storyboard_df.to_csv(csv_path, index=False)
-        log.info(f"Saving full storyboard to {json_path}")
+        log.info(f"Saving final combined storyboard to {json_path}")
        storyboard_df.to_json(json_path, orient="records", indent=4)
    def _parse_and_save_storyboard(
        self,
        txt_path: Path,
        output_dir: Path,
    ) -> Optional["pd.DataFrame"]:
        """
        Parses a storyboard file and saves it to CSV and JSON.
        This method now uses helper functions for parsing and saving.
        """
        storyboard_df = self._parse_storyboard_from_txt(txt_path)
        if storyboard_df is None or storyboard_df.empty:
            return None
        self._save_storyboard_artifacts(storyboard_df, output_dir)
        return storyboard_df
    def _create_and_save_summary(
-        self, storyboard_df: "pd.DataFrame", output_dir: Path, options: dict
+        self,
        storyboard_df: "pd.DataFrame",
        output_dir: Path,
        options: dict,
    ) -> "pd.DataFrame":
        df = storyboard_df.copy()
@ -234,8 +303,21 @@ class FlightAnalyzer:
        for component in status_components[1:]:
            df['status'] = df['status'] + "_" + component
        # --- MODIFICATION START ---
        # Detect segment changes based on status AND large TTAG jumps.
        df['status_changed'] = df['status'].ne(df['status'].shift())
        # A TTAG jump is considered a change of segment.
        # A jump of 1,000,000 ticks corresponds to ~64 seconds. This indicates a likely data anomaly
        # or a significant time gap between recordings that should be treated as a new segment.
        TICK_JUMP_THRESHOLD = 1_000_000 
        ttag_diff = df['TTAG'].diff()
        df['ttag_jump'] = ttag_diff.abs() > TICK_JUMP_THRESHOLD
        # A new segment starts if the status changes OR if there's a TTAG jump.
        df['status_changed'] = df['status_changed'] | df['ttag_jump']
        # --- MODIFICATION END ---
        min_ttag = df['TTAG'].min()
        df['flight_time_s'] = (df['TTAG'] - min_ttag) * TICK_DURATION_S
@ -294,12 +376,12 @@ class FlightAnalyzer:
                f.write(f" FLIGHT ANALYSIS REPORT - {self.current_flight_name} \n")
                f.write("=" * 80 + "\n\n")
-                f.write("--- FLIGHT OVERVIEW ---\n")
+                f.write("--- FLIGHT OVERVIEW ---\\n")
-                f.write(f"Total Duration: {total_duration:.2f} seconds\n")
+                f.write(f"Total Duration: {total_duration:.2f} seconds\\n")
-                f.write(f"Total Batches:  {total_batches}\n")
+                f.write(f"Total Batches:  {total_batches}\\n")
-                f.write(f"Total Segments: {num_segments}\n\n")
+                f.write(f"Total Segments: {num_segments}\\n\n")
-                f.write("--- SEGMENT SUMMARY ---\n")
+                f.write("--- SEGMENT SUMMARY ---\\n")
                report_df = summary_df.copy()
                report_df['Duration (s)'] = report_df['Duration (s)'].map('{:.2f}'.format)