add multisegment analisys, fix trunc file into g_reconvert report

2025-09-17 08:30:30 +02:00 · 2025-09-17 08:30:30 +02:00 · 73e5bb8aa2
commit 73e5bb8aa2
parent d12cca39aa
3 changed files with 168 additions and 78 deletions
--- a/config/config.json
+++ b/config/config.json
@ -3,7 +3,7 @@
    "last_opened_rec_file": "C:/src/____GitProjects/radar_data_reader/_rec/_25-05-15-12-22-52_sata_345.rec",
    "last_out_output_dir": "C:\\src\\____GitProjects\\radar_data_reader\\out_analisys",
    "last_rec_output_dir": "C:\\src\\____GitProjects\\radar_data_reader\\_rec",
-    "last_flight_folder": "C:/__Voli/Volo_12_25maggio2025/rec/rec",
+    "last_flight_folder": "C:/__Voli/_t_bay_scan",
    "last_flight_workspace_parent_dir": "C:/src/____GitProjects/radar_data_reader/flight_workspace/",
    "active_out_export_profile_name": "trackingdata",
    "export_profiles": [
--- a/radar_data_reader/core/export_manager.py
+++ b/radar_data_reader/core/export_manager.py
@ -83,11 +83,19 @@ class ExportManager:
        if not exe_path or not Path(exe_path).is_file():
            raise ValueError(f"g_reconverter executable not found at: {exe_path}")

-        # Ora job.start_file e job.end_file dovrebbero essere corretti
-        first_rec_path = job.rec_folder / job.start_file
+        # --- NEW ROBUST LOGIC ---
+        # The filename from the summary can be truncated in various ways (.re, or no extension).
+        # First, reconstruct the full, correct filename for finding the file on disk.
+        start_file_base = job.start_file.split('.')[0]
+        start_filename_full = start_file_base + '.rec'
+        first_rec_path = job.rec_folder / start_filename_full
+
+        # Second, use a flexible regex to extract the sequence number from the (possibly truncated) name.
+        # This regex handles .rec, .re, or no extension.
+        start_num_match = re.search(r"_(\d+)(\.re(c?)?)?$", job.start_file)
+        end_num_match = re.search(r"_(\d+)(\.re(c?)?)?$", job.end_file)
+        # --- END NEW LOGIC ---

-        start_num_match = re.search(r"_(\d+)\.rec$", job.start_file)
-        end_num_match = re.search(r"_(\d+)\.rec$", job.end_file)
        if not start_num_match or not end_num_match:
            log.error(
                f"Could not parse sequence number from filenames: '{job.start_file}', '{job.end_file}'"
--- a/radar_data_reader/core/flight_analyzer.py
+++ b/radar_data_reader/core/flight_analyzer.py
@ -13,6 +13,7 @@ import re
 from pathlib import Path
 from typing import List, Optional
 from datetime import timedelta
+from collections import defaultdict

 try:
    import pandas as pd
@ -27,6 +28,7 @@ log = logger.get_logger(__name__)

 TICK_DURATION_S = 64e-6

+
 class FlightAnalyzer:
    """Manages the multi-step process of analyzing a flight folder."""

@ -39,7 +41,11 @@ class FlightAnalyzer:
        self.analysis_options: dict = {}

    def start_analysis(
-        self, rec_folder_str: str, flight_name: str, workspace_path: Path, analysis_options: dict
+        self,
+        rec_folder_str: str,
+        flight_name: str,
+        workspace_path: Path,
+        analysis_options: dict,
    ) -> threading.Thread:
        self.current_flight_name = flight_name
        self.analysis_options = analysis_options
@ -52,6 +58,11 @@ class FlightAnalyzer:
        return analysis_thread

    def _flight_analysis_orchestrator(self, rec_folder_str: str, flight_name: str, flight_dir: Path):
+        """
+        Orchestrates the entire flight analysis process.
+        This method now groups .rec files by session, runs the C++ analyzer for each group,
+        and then combines the results for final processing.
+        """
        self.current_flight_folder_path = None
        try:
            flight_dir.mkdir(parents=True, exist_ok=True)
@ -60,85 +71,115 @@ class FlightAnalyzer:
            cpp_config = self.config_manager.get_cpp_converter_config()
            exe_path = cpp_config.get("cpp_executable_path")
            if not exe_path or not Path(exe_path).is_file():
-                raise ValueError(
-                    f"C++ executable not found at path: {exe_path}"
-                )
+                raise ValueError(f"C++ executable not found at path: {exe_path}")

            rec_files = sorted(Path(rec_folder_str).glob("*.rec"))
            if not rec_files:
                raise FileNotFoundError("No .rec files found in the specified folder.")

-            first_rec_path = rec_files[0]
-            num_files_to_process = len(rec_files)
+            # Group files by a common base name to handle multiple recording sessions.
+            # The base name is determined by splitting the filename before the last underscore,
+            # which typically separates the descriptive part from the sequence number.
+            file_groups = defaultdict(list)
+            for f in rec_files:
+                base_name = f.name.rsplit('_', 1)[0]
+                file_groups[base_name].append(f)

-            command_list = [
-                str(exe_path),
-                str(first_rec_path),
-                f"/n={num_files_to_process}",
-                "/p=1",
-                "/a",
-                '/vsave',
-                '/vshow',
-            ]
+            log.info(f"Found {len(rec_files)} .rec files, grouped into {len(file_groups)} recording session(s).")

-            log.info(f"Running g_reconverter for full analysis: {' '.join(command_list)}")
+            all_storyboard_dfs = []
+            for group_name, group_files in file_groups.items():
+                log.info(f"Processing group '{group_name}' with {len(group_files)} file(s)...")
+                
+                first_rec_path = group_files[0]
+                num_files_to_process = len(group_files)

-            self.worker_process = mp.Process(
-                target=run_cpp_converter,
-                args=(command_list, self.result_queue, str(flight_dir), True),
-                daemon=True,
-            )
-            self.worker_process.start()
-            self.worker_process.join()
+                command_list = [
+                    str(exe_path),
+                    str(first_rec_path),
+                    f"/n={num_files_to_process}",
+                    "/p=1",
+                    "/a",
+                    '/vsave',
+                    '/vshow',
+                ]

-            log.info("g_reconverter full analysis process finished.")
-            self.result_queue.put({"type": "cpp_complete"})
+                log.info(f"Running g_reconverter for group '{group_name}': {' '.join(command_list)}")

-        except Exception as e:
-            log.error(f"Flight analysis orchestrator failed: {e}", exc_info=True)
-            self.result_queue.put({"type": "error", "message": str(e)})
+                # Run the C++ process for the current group.
+                # This is a blocking call to ensure sequential processing of groups.
+                self.worker_process = mp.Process(
+                    target=run_cpp_converter,
+                    args=(command_list, self.result_queue, str(flight_dir), True),
+                    daemon=True,
+                )
+                self.worker_process.start()
+                self.worker_process.join()
+                log.info(f"C++ analysis finished for group '{group_name}'.")

-    def handle_final_analysis_steps(self):
-        if not self.current_flight_folder_path:
-            log.error("Cannot run final analysis steps: flight folder path is not set.")
-            self.result_queue.put({"type": "error", "message": "Internal state error: flight folder path missing."})
-            return
+                # After the C++ process completes, parse its output.
+                # The output file is expected to be in the flight_dir.
+                summary_files = [f for f in flight_dir.glob("pp-*.txt") if "aesa" not in f.name.lower()]
+                if not summary_files:
+                    log.warning(f"No summary file found for group '{group_name}'. Skipping.")
+                    continue

-        try:
-            log.info("C++ part complete. Starting Python-side analysis...")
-            all_txt_files = list(self.current_flight_folder_path.glob("pp-*.txt"))
-            summary_files = [f for f in all_txt_files if "aesa" not in f.name.lower()]
-            if not summary_files:
-                raise FileNotFoundError("Main summary file not found after analysis.")
+                summary_txt_path = summary_files[0]
+                log.info(f"Parsing summary file for group: {summary_txt_path.name}")
+                
+                storyboard_df_group = self._parse_storyboard_from_txt(summary_txt_path)
+                if storyboard_df_group is not None and not storyboard_df_group.empty:
+                    all_storyboard_dfs.append(storyboard_df_group)
+                
+                # Clean up the output file to prevent it from being used by the next iteration.
+                try:
+                    summary_txt_path.unlink()
+                except OSError as e:
+                    log.warning(f"Could not delete temporary summary file {summary_txt_path}: {e}")

-            summary_txt_path = summary_files[0]
-            log.info(f"Found main summary file: {summary_txt_path.name}")

-            storyboard_df = self._parse_and_save_storyboard(
-                summary_txt_path, self.current_flight_folder_path
-            )
-            if storyboard_df is None or storyboard_df.empty:
-                raise ValueError("Parsing storyboard failed or resulted in empty data.")
+            if not all_storyboard_dfs:
+                raise ValueError("Analysis failed: No storyboard data could be parsed from any file group.")

-            summary_df = self._create_and_save_summary(
-                storyboard_df, self.current_flight_folder_path, self.analysis_options
-            )
+            # Combine all dataframes from all groups into one.
+            log.info(f"Combining {len(all_storyboard_dfs)} storyboard segment(s) into a single flight storyboard.")
+            storyboard_df = pd.concat(all_storyboard_dfs, ignore_index=True)
            
-            self._create_flight_report_txt(summary_df, self.current_flight_folder_path)
+            # Sort the combined dataframe by batch number to ensure correct chronological order.
+            storyboard_df.sort_values(by="Batch", inplace=True)
+            storyboard_df.reset_index(drop=True, inplace=True)
+
+            # --- Start of final analysis steps (previously in handle_final_analysis_steps) ---
+            log.info("C++ processing complete for all groups. Starting final Python-side analysis...")
+
+            # Save the final, combined storyboard.
+            self._save_storyboard_artifacts(storyboard_df, flight_dir)
+
+            summary_df = self._create_and_save_summary(storyboard_df, flight_dir, self.analysis_options)
+            self._create_flight_report_txt(summary_df, flight_dir)

            self.result_queue.put({
-                "type": "analysis_summary_data", 
+                "type": "analysis_summary_data",
                "data": summary_df,
-                "flight_folder_path": self.current_flight_folder_path
+                "flight_folder_path": flight_dir
            })

            log.info("Flight analysis complete. All artifacts saved.")
            self.result_queue.put({"type": "complete", "message": "Analysis successful."})

        except Exception as e:
-            log.error(f"Final analysis steps failed: {e}", exc_info=True)
+            log.error(f"Flight analysis orchestrator failed: {e}", exc_info=True)
            self.result_queue.put({"type": "error", "message": str(e)})

+    def handle_final_analysis_steps(self):
+        """
+        This method is now a placeholder.
+        The logic has been moved into _flight_analysis_orchestrator to ensure
+        sequential execution after all C++ processes are complete.
+        """
+        log.info("Final analysis steps are now integrated into the main orchestrator.")
+        pass
+
    def _make_columns_unique(self, columns: List[str]) -> List[str]:
        final_cols, counts = [], {}
        for col in columns:
@ -150,9 +191,8 @@ class FlightAnalyzer:
                final_cols.append(col)
        return final_cols

-    def _parse_and_save_storyboard(
-        self, txt_path: Path, output_dir: Path
-    ) -> Optional["pd.DataFrame"]:
+    def _parse_storyboard_from_txt(self, txt_path: Path) -> Optional["pd.DataFrame"]:
+        """Parses a storyboard TXT file into a pandas DataFrame."""
        if pd is None:
            log.error("Pandas library is not installed, cannot parse storyboard.")
            return None
@ -163,10 +203,10 @@ class FlightAnalyzer:
                unique_column_names = self._make_columns_unique(raw_columns)
            
            storyboard_df = pd.read_csv(
-                txt_path, 
-                sep=';', 
-                header=0, 
-                names=unique_column_names, 
+                txt_path,
+                sep=';',
+                header=0,
+                names=unique_column_names,
                on_bad_lines='skip',
                encoding='utf-8',
                encoding_errors='ignore'
@ -174,7 +214,15 @@ class FlightAnalyzer:

            for col in storyboard_df.select_dtypes(include=['object']).columns:
                storyboard_df[col] = storyboard_df[col].str.strip()
-            
+
+            # --- MODIFICATION START ---
+            # Correct truncated .re filenames from the C++ tool's output to .rec
+            if 'file' in storyboard_df.columns:
+                storyboard_df['file'] = storyboard_df['file'].apply(
+                    lambda x: x + 'c' if isinstance(x, str) and x.endswith('.re') else x
+                )
+            # --- MODIFICATION END ---
+
            numeric_cols = ["Batch", "TTAG"]
            for col in numeric_cols:
                if col in storyboard_df.columns:
@ -184,27 +232,48 @@ class FlightAnalyzer:
            storyboard_df["Batch"] = storyboard_df["Batch"].astype(int)
            storyboard_df["TTAG"] = storyboard_df["TTAG"].astype(int)

+            if storyboard_df.empty:
+                log.warning(f"DataFrame is empty after cleaning {txt_path.name}")
+                return None
+            
+            return storyboard_df
+
        except Exception as e:
            log.error(f"Failed to read or process summary file {txt_path.name}: {e}")
            return None
-        
-        if storyboard_df.empty:
-            log.warning(f"DataFrame is empty after cleaning {txt_path.name}")
-            return None

+    def _save_storyboard_artifacts(self, storyboard_df: "pd.DataFrame", output_dir: Path):
+        """Saves the final storyboard DataFrame to CSV and JSON."""
        csv_path = output_dir / "flight_storyboard.csv"
        json_path = output_dir / "flight_storyboard.json"
        
-        log.info(f"Saving full storyboard to {csv_path}")
+        log.info(f"Saving final combined storyboard to {csv_path}")
        storyboard_df.to_csv(csv_path, index=False)
        
-        log.info(f"Saving full storyboard to {json_path}")
+        log.info(f"Saving final combined storyboard to {json_path}")
        storyboard_df.to_json(json_path, orient="records", indent=4)
+
+    def _parse_and_save_storyboard(
+        self,
+        txt_path: Path,
+        output_dir: Path,
+    ) -> Optional["pd.DataFrame"]:
+        """
+        Parses a storyboard file and saves it to CSV and JSON.
+        This method now uses helper functions for parsing and saving.
+        """
+        storyboard_df = self._parse_storyboard_from_txt(txt_path)
+        if storyboard_df is None or storyboard_df.empty:
+            return None
        
+        self._save_storyboard_artifacts(storyboard_df, output_dir)
        return storyboard_df

    def _create_and_save_summary(
-        self, storyboard_df: "pd.DataFrame", output_dir: Path, options: dict
+        self,
+        storyboard_df: "pd.DataFrame",
+        output_dir: Path,
+        options: dict,
    ) -> "pd.DataFrame":
        df = storyboard_df.copy()
        
@ -234,8 +303,21 @@ class FlightAnalyzer:
        for component in status_components[1:]:
            df['status'] = df['status'] + "_" + component

+        # --- MODIFICATION START ---
+        # Detect segment changes based on status AND large TTAG jumps.
        df['status_changed'] = df['status'].ne(df['status'].shift())
        
+        # A TTAG jump is considered a change of segment.
+        # A jump of 1,000,000 ticks corresponds to ~64 seconds. This indicates a likely data anomaly
+        # or a significant time gap between recordings that should be treated as a new segment.
+        TICK_JUMP_THRESHOLD = 1_000_000 
+        ttag_diff = df['TTAG'].diff()
+        df['ttag_jump'] = ttag_diff.abs() > TICK_JUMP_THRESHOLD
+        
+        # A new segment starts if the status changes OR if there's a TTAG jump.
+        df['status_changed'] = df['status_changed'] | df['ttag_jump']
+        # --- MODIFICATION END ---
+        
        min_ttag = df['TTAG'].min()
        df['flight_time_s'] = (df['TTAG'] - min_ttag) * TICK_DURATION_S

@ -294,12 +376,12 @@ class FlightAnalyzer:
                f.write(f" FLIGHT ANALYSIS REPORT - {self.current_flight_name} \n")
                f.write("=" * 80 + "\n\n")

-                f.write("--- FLIGHT OVERVIEW ---\n")
-                f.write(f"Total Duration: {total_duration:.2f} seconds\n")
-                f.write(f"Total Batches:  {total_batches}\n")
-                f.write(f"Total Segments: {num_segments}\n\n")
+                f.write("--- FLIGHT OVERVIEW ---\\n")
+                f.write(f"Total Duration: {total_duration:.2f} seconds\\n")
+                f.write(f"Total Batches:  {total_batches}\\n")
+                f.write(f"Total Segments: {num_segments}\\n\n")

-                f.write("--- SEGMENT SUMMARY ---\n")
+                f.write("--- SEGMENT SUMMARY ---\\n")
                
                report_df = summary_df.copy()
                report_df['Duration (s)'] = report_df['Duration (s)'].map('{:.2f}'.format)