add multisegment analisys, fix trunc file into g_reconvert report

This commit is contained in:
VALLONGOL 2025-09-17 08:30:30 +02:00
parent d12cca39aa
commit 73e5bb8aa2
3 changed files with 168 additions and 78 deletions

View File

@ -3,7 +3,7 @@
"last_opened_rec_file": "C:/src/____GitProjects/radar_data_reader/_rec/_25-05-15-12-22-52_sata_345.rec",
"last_out_output_dir": "C:\\src\\____GitProjects\\radar_data_reader\\out_analisys",
"last_rec_output_dir": "C:\\src\\____GitProjects\\radar_data_reader\\_rec",
"last_flight_folder": "C:/__Voli/Volo_12_25maggio2025/rec/rec",
"last_flight_folder": "C:/__Voli/_t_bay_scan",
"last_flight_workspace_parent_dir": "C:/src/____GitProjects/radar_data_reader/flight_workspace/",
"active_out_export_profile_name": "trackingdata",
"export_profiles": [

View File

@ -83,11 +83,19 @@ class ExportManager:
if not exe_path or not Path(exe_path).is_file():
raise ValueError(f"g_reconverter executable not found at: {exe_path}")
# Ora job.start_file e job.end_file dovrebbero essere corretti
first_rec_path = job.rec_folder / job.start_file
# --- NEW ROBUST LOGIC ---
# The filename from the summary can be truncated in various ways (.re, or no extension).
# First, reconstruct the full, correct filename for finding the file on disk.
start_file_base = job.start_file.split('.')[0]
start_filename_full = start_file_base + '.rec'
first_rec_path = job.rec_folder / start_filename_full
# Second, use a flexible regex to extract the sequence number from the (possibly truncated) name.
# This regex handles .rec, .re, or no extension.
start_num_match = re.search(r"_(\d+)(\.re(c?)?)?$", job.start_file)
end_num_match = re.search(r"_(\d+)(\.re(c?)?)?$", job.end_file)
# --- END NEW LOGIC ---
start_num_match = re.search(r"_(\d+)\.rec$", job.start_file)
end_num_match = re.search(r"_(\d+)\.rec$", job.end_file)
if not start_num_match or not end_num_match:
log.error(
f"Could not parse sequence number from filenames: '{job.start_file}', '{job.end_file}'"

View File

@ -13,6 +13,7 @@ import re
from pathlib import Path
from typing import List, Optional
from datetime import timedelta
from collections import defaultdict
try:
import pandas as pd
@ -27,6 +28,7 @@ log = logger.get_logger(__name__)
TICK_DURATION_S = 64e-6
class FlightAnalyzer:
"""Manages the multi-step process of analyzing a flight folder."""
@ -39,7 +41,11 @@ class FlightAnalyzer:
self.analysis_options: dict = {}
def start_analysis(
self, rec_folder_str: str, flight_name: str, workspace_path: Path, analysis_options: dict
self,
rec_folder_str: str,
flight_name: str,
workspace_path: Path,
analysis_options: dict,
) -> threading.Thread:
self.current_flight_name = flight_name
self.analysis_options = analysis_options
@ -52,6 +58,11 @@ class FlightAnalyzer:
return analysis_thread
def _flight_analysis_orchestrator(self, rec_folder_str: str, flight_name: str, flight_dir: Path):
"""
Orchestrates the entire flight analysis process.
This method now groups .rec files by session, runs the C++ analyzer for each group,
and then combines the results for final processing.
"""
self.current_flight_folder_path = None
try:
flight_dir.mkdir(parents=True, exist_ok=True)
@ -60,85 +71,115 @@ class FlightAnalyzer:
cpp_config = self.config_manager.get_cpp_converter_config()
exe_path = cpp_config.get("cpp_executable_path")
if not exe_path or not Path(exe_path).is_file():
raise ValueError(
f"C++ executable not found at path: {exe_path}"
)
raise ValueError(f"C++ executable not found at path: {exe_path}")
rec_files = sorted(Path(rec_folder_str).glob("*.rec"))
if not rec_files:
raise FileNotFoundError("No .rec files found in the specified folder.")
first_rec_path = rec_files[0]
num_files_to_process = len(rec_files)
# Group files by a common base name to handle multiple recording sessions.
# The base name is determined by splitting the filename before the last underscore,
# which typically separates the descriptive part from the sequence number.
file_groups = defaultdict(list)
for f in rec_files:
base_name = f.name.rsplit('_', 1)[0]
file_groups[base_name].append(f)
command_list = [
str(exe_path),
str(first_rec_path),
f"/n={num_files_to_process}",
"/p=1",
"/a",
'/vsave',
'/vshow',
]
log.info(f"Found {len(rec_files)} .rec files, grouped into {len(file_groups)} recording session(s).")
log.info(f"Running g_reconverter for full analysis: {' '.join(command_list)}")
all_storyboard_dfs = []
for group_name, group_files in file_groups.items():
log.info(f"Processing group '{group_name}' with {len(group_files)} file(s)...")
first_rec_path = group_files[0]
num_files_to_process = len(group_files)
self.worker_process = mp.Process(
target=run_cpp_converter,
args=(command_list, self.result_queue, str(flight_dir), True),
daemon=True,
)
self.worker_process.start()
self.worker_process.join()
command_list = [
str(exe_path),
str(first_rec_path),
f"/n={num_files_to_process}",
"/p=1",
"/a",
'/vsave',
'/vshow',
]
log.info("g_reconverter full analysis process finished.")
self.result_queue.put({"type": "cpp_complete"})
log.info(f"Running g_reconverter for group '{group_name}': {' '.join(command_list)}")
except Exception as e:
log.error(f"Flight analysis orchestrator failed: {e}", exc_info=True)
self.result_queue.put({"type": "error", "message": str(e)})
# Run the C++ process for the current group.
# This is a blocking call to ensure sequential processing of groups.
self.worker_process = mp.Process(
target=run_cpp_converter,
args=(command_list, self.result_queue, str(flight_dir), True),
daemon=True,
)
self.worker_process.start()
self.worker_process.join()
log.info(f"C++ analysis finished for group '{group_name}'.")
def handle_final_analysis_steps(self):
if not self.current_flight_folder_path:
log.error("Cannot run final analysis steps: flight folder path is not set.")
self.result_queue.put({"type": "error", "message": "Internal state error: flight folder path missing."})
return
# After the C++ process completes, parse its output.
# The output file is expected to be in the flight_dir.
summary_files = [f for f in flight_dir.glob("pp-*.txt") if "aesa" not in f.name.lower()]
if not summary_files:
log.warning(f"No summary file found for group '{group_name}'. Skipping.")
continue
try:
log.info("C++ part complete. Starting Python-side analysis...")
all_txt_files = list(self.current_flight_folder_path.glob("pp-*.txt"))
summary_files = [f for f in all_txt_files if "aesa" not in f.name.lower()]
if not summary_files:
raise FileNotFoundError("Main summary file not found after analysis.")
summary_txt_path = summary_files[0]
log.info(f"Parsing summary file for group: {summary_txt_path.name}")
storyboard_df_group = self._parse_storyboard_from_txt(summary_txt_path)
if storyboard_df_group is not None and not storyboard_df_group.empty:
all_storyboard_dfs.append(storyboard_df_group)
# Clean up the output file to prevent it from being used by the next iteration.
try:
summary_txt_path.unlink()
except OSError as e:
log.warning(f"Could not delete temporary summary file {summary_txt_path}: {e}")
summary_txt_path = summary_files[0]
log.info(f"Found main summary file: {summary_txt_path.name}")
storyboard_df = self._parse_and_save_storyboard(
summary_txt_path, self.current_flight_folder_path
)
if storyboard_df is None or storyboard_df.empty:
raise ValueError("Parsing storyboard failed or resulted in empty data.")
if not all_storyboard_dfs:
raise ValueError("Analysis failed: No storyboard data could be parsed from any file group.")
summary_df = self._create_and_save_summary(
storyboard_df, self.current_flight_folder_path, self.analysis_options
)
# Combine all dataframes from all groups into one.
log.info(f"Combining {len(all_storyboard_dfs)} storyboard segment(s) into a single flight storyboard.")
storyboard_df = pd.concat(all_storyboard_dfs, ignore_index=True)
self._create_flight_report_txt(summary_df, self.current_flight_folder_path)
# Sort the combined dataframe by batch number to ensure correct chronological order.
storyboard_df.sort_values(by="Batch", inplace=True)
storyboard_df.reset_index(drop=True, inplace=True)
# --- Start of final analysis steps (previously in handle_final_analysis_steps) ---
log.info("C++ processing complete for all groups. Starting final Python-side analysis...")
# Save the final, combined storyboard.
self._save_storyboard_artifacts(storyboard_df, flight_dir)
summary_df = self._create_and_save_summary(storyboard_df, flight_dir, self.analysis_options)
self._create_flight_report_txt(summary_df, flight_dir)
self.result_queue.put({
"type": "analysis_summary_data",
"type": "analysis_summary_data",
"data": summary_df,
"flight_folder_path": self.current_flight_folder_path
"flight_folder_path": flight_dir
})
log.info("Flight analysis complete. All artifacts saved.")
self.result_queue.put({"type": "complete", "message": "Analysis successful."})
except Exception as e:
log.error(f"Final analysis steps failed: {e}", exc_info=True)
log.error(f"Flight analysis orchestrator failed: {e}", exc_info=True)
self.result_queue.put({"type": "error", "message": str(e)})
def handle_final_analysis_steps(self):
"""
This method is now a placeholder.
The logic has been moved into _flight_analysis_orchestrator to ensure
sequential execution after all C++ processes are complete.
"""
log.info("Final analysis steps are now integrated into the main orchestrator.")
pass
def _make_columns_unique(self, columns: List[str]) -> List[str]:
final_cols, counts = [], {}
for col in columns:
@ -150,9 +191,8 @@ class FlightAnalyzer:
final_cols.append(col)
return final_cols
def _parse_and_save_storyboard(
self, txt_path: Path, output_dir: Path
) -> Optional["pd.DataFrame"]:
def _parse_storyboard_from_txt(self, txt_path: Path) -> Optional["pd.DataFrame"]:
"""Parses a storyboard TXT file into a pandas DataFrame."""
if pd is None:
log.error("Pandas library is not installed, cannot parse storyboard.")
return None
@ -163,10 +203,10 @@ class FlightAnalyzer:
unique_column_names = self._make_columns_unique(raw_columns)
storyboard_df = pd.read_csv(
txt_path,
sep=';',
header=0,
names=unique_column_names,
txt_path,
sep=';',
header=0,
names=unique_column_names,
on_bad_lines='skip',
encoding='utf-8',
encoding_errors='ignore'
@ -174,7 +214,15 @@ class FlightAnalyzer:
for col in storyboard_df.select_dtypes(include=['object']).columns:
storyboard_df[col] = storyboard_df[col].str.strip()
# --- MODIFICATION START ---
# Correct truncated .re filenames from the C++ tool's output to .rec
if 'file' in storyboard_df.columns:
storyboard_df['file'] = storyboard_df['file'].apply(
lambda x: x + 'c' if isinstance(x, str) and x.endswith('.re') else x
)
# --- MODIFICATION END ---
numeric_cols = ["Batch", "TTAG"]
for col in numeric_cols:
if col in storyboard_df.columns:
@ -184,27 +232,48 @@ class FlightAnalyzer:
storyboard_df["Batch"] = storyboard_df["Batch"].astype(int)
storyboard_df["TTAG"] = storyboard_df["TTAG"].astype(int)
if storyboard_df.empty:
log.warning(f"DataFrame is empty after cleaning {txt_path.name}")
return None
return storyboard_df
except Exception as e:
log.error(f"Failed to read or process summary file {txt_path.name}: {e}")
return None
if storyboard_df.empty:
log.warning(f"DataFrame is empty after cleaning {txt_path.name}")
return None
def _save_storyboard_artifacts(self, storyboard_df: "pd.DataFrame", output_dir: Path):
"""Saves the final storyboard DataFrame to CSV and JSON."""
csv_path = output_dir / "flight_storyboard.csv"
json_path = output_dir / "flight_storyboard.json"
log.info(f"Saving full storyboard to {csv_path}")
log.info(f"Saving final combined storyboard to {csv_path}")
storyboard_df.to_csv(csv_path, index=False)
log.info(f"Saving full storyboard to {json_path}")
log.info(f"Saving final combined storyboard to {json_path}")
storyboard_df.to_json(json_path, orient="records", indent=4)
def _parse_and_save_storyboard(
self,
txt_path: Path,
output_dir: Path,
) -> Optional["pd.DataFrame"]:
"""
Parses a storyboard file and saves it to CSV and JSON.
This method now uses helper functions for parsing and saving.
"""
storyboard_df = self._parse_storyboard_from_txt(txt_path)
if storyboard_df is None or storyboard_df.empty:
return None
self._save_storyboard_artifacts(storyboard_df, output_dir)
return storyboard_df
def _create_and_save_summary(
self, storyboard_df: "pd.DataFrame", output_dir: Path, options: dict
self,
storyboard_df: "pd.DataFrame",
output_dir: Path,
options: dict,
) -> "pd.DataFrame":
df = storyboard_df.copy()
@ -234,8 +303,21 @@ class FlightAnalyzer:
for component in status_components[1:]:
df['status'] = df['status'] + "_" + component
# --- MODIFICATION START ---
# Detect segment changes based on status AND large TTAG jumps.
df['status_changed'] = df['status'].ne(df['status'].shift())
# A TTAG jump is considered a change of segment.
# A jump of 1,000,000 ticks corresponds to ~64 seconds. This indicates a likely data anomaly
# or a significant time gap between recordings that should be treated as a new segment.
TICK_JUMP_THRESHOLD = 1_000_000
ttag_diff = df['TTAG'].diff()
df['ttag_jump'] = ttag_diff.abs() > TICK_JUMP_THRESHOLD
# A new segment starts if the status changes OR if there's a TTAG jump.
df['status_changed'] = df['status_changed'] | df['ttag_jump']
# --- MODIFICATION END ---
min_ttag = df['TTAG'].min()
df['flight_time_s'] = (df['TTAG'] - min_ttag) * TICK_DURATION_S
@ -294,12 +376,12 @@ class FlightAnalyzer:
f.write(f" FLIGHT ANALYSIS REPORT - {self.current_flight_name} \n")
f.write("=" * 80 + "\n\n")
f.write("--- FLIGHT OVERVIEW ---\n")
f.write(f"Total Duration: {total_duration:.2f} seconds\n")
f.write(f"Total Batches: {total_batches}\n")
f.write(f"Total Segments: {num_segments}\n\n")
f.write("--- FLIGHT OVERVIEW ---\\n")
f.write(f"Total Duration: {total_duration:.2f} seconds\\n")
f.write(f"Total Batches: {total_batches}\\n")
f.write(f"Total Segments: {num_segments}\\n\n")
f.write("--- SEGMENT SUMMARY ---\n")
f.write("--- SEGMENT SUMMARY ---\\n")
report_df = summary_df.copy()
report_df['Duration (s)'] = report_df['Duration (s)'].map('{:.2f}'.format)