SXXXXXXX_RadarDataReader/extract_cdpsts.py

# extract_cdpsts.py
import numpy as np
import sys
from pathlib import Path

# --- Configuration based on our analysis ---

# The marker for "legacy" blocks, which include CDPSTS
LEGACY_BLOCK_MARKER = 0x5A5A5A5A

# The ID for the CDPSTS block name (from BLOCK_TYPE_MAP)
CDPSTS_BLOCK_ID = 1397769283

# Offsets within the legacy block header
LEGACY_NAME_OFFSET_WORDS = 17
LEGACY_SIZE_OFFSET_WORDS = 5


def extract_first_cdpsts_block(input_file: Path, output_file: Path):
    """
    Finds the first CDPSTS block in the input file and writes it to the output file.
    """
    if not input_file.is_file():
        print(f"Error: Input file not found at '{input_file}'")
        return

    print(f"Loading data from '{input_file}'...")
    try:
        data_vector = np.fromfile(str(input_file), dtype="<u4")
        print(f"Loaded {data_vector.size} 32-bit words.")
    except Exception as e:
        print(f"Error loading file: {e}")
        return

    print(f"Searching for legacy block marker (0x{LEGACY_BLOCK_MARKER:X})...")
    legacy_indices = np.where(data_vector == LEGACY_BLOCK_MARKER)[0]

    # A valid block starts with TWO consecutive markers
    legacy_starts = legacy_indices[:-1][np.diff(legacy_indices) == 1]

    if not legacy_starts.any():
        print("No legacy blocks with double markers found.")
        return

    print(
        f"Found {len(legacy_starts)} potential legacy blocks. Searching for CDPSTS..."
    )

    for start_index in legacy_starts:
        try:
            # Check if it's a CDPSTS block by looking at the name ID
            name_id = data_vector[start_index + LEGACY_NAME_OFFSET_WORDS]

            if name_id == CDPSTS_BLOCK_ID:
                print(f"Found a CDPSTS block at word offset: {start_index}")

                # Get the total size of the block's data from its header
                block_size_bytes = data_vector[start_index + LEGACY_SIZE_OFFSET_WORDS]
                block_size_words = (block_size_bytes + 3) // 4

                # The total block includes the header structure itself.
                # Let's assume a generous header size for legacy blocks to be safe.
                # The IDL script implies the payload starts right after the markers.
                # The size in the header refers to the data payload following the full header.
                # Based on IDL, the payload starts right after the double marker.
                # However, the structure is more complex. Let's extract the whole chunk
                # from the marker to the end of its declared size for analysis.

                # For simplicity, we extract the entire block as seen by its header.
                # A legacy block starts at the marker and has a certain size.
                # Let's assume the size in the header is for the payload that follows
                # the *full* header structure. Let's approximate the header to 36 words
                # and extract payload.

                # Correction: The size value at offset 5 is the size of the payload.
                # The block itself starts at the marker. Let's find the full block size.
                # The most reliable way is to find the next block's start.

                # Let's use a simpler, more direct approach: extract from the marker
                # to the end of the declared payload size, assuming the payload starts
                # right after the header.

                # Based on the C++ code, the size in the header is the payload size.
                # The entire block written to the file is header + payload.
                # Let's re-read the IDL: it uses `BLOCK_DATA = vector[INDEX_BLOCK:INDEX_BLOCK+BLOCK_SIZE-1]`
                # where BLOCK_SIZE is read from offset 5 and divided by 4.
                # This implies the payload starts at INDEX_BLOCK. This seems off.

                # Let's go with the most robust extraction: find the start of the next block.
                next_block_starts = legacy_starts[legacy_starts > start_index]
                if not next_block_starts.any():
                    print(
                        "Found last CDPSTS block in file, cannot determine exact size."
                    )
                    # Fallback to size from header, assuming it's the full block size
                    # This is likely incorrect, but better than nothing.
                    end_index = start_index + block_size_words
                else:
                    end_index = next_block_starts[0]

                print(f"Extracting block from word {start_index} to {end_index}.")
                block_data = data_vector[start_index:end_index]

                print(f"Writing {block_data.size * 4} bytes to '{output_file}'...")
                with open(output_file, "wb") as f:
                    f.write(block_data.tobytes())

                print("Extraction complete.")
                return  # Stop after the first one

        except IndexError:
            # This can happen if a marker is found too close to the end of the file
            continue

    print("No CDPSTS blocks were found in the file.")


if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("Usage: python extract_cdpsts.py <path_to_your_file.out>")
        sys.exit(1)

    input_path = Path(sys.argv[1])
    output_path = Path("cdpsts_block_sample.out")

    extract_first_cdpsts_block(input_path, output_path)