127 lines
5.3 KiB
Python
127 lines
5.3 KiB
Python
# extract_cdpsts.py
|
|
import numpy as np
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# --- Configuration based on our analysis ---
|
|
|
|
# The marker for "legacy" blocks, which include CDPSTS
|
|
LEGACY_BLOCK_MARKER = 0x5A5A5A5A
|
|
|
|
# The ID for the CDPSTS block name (from BLOCK_TYPE_MAP)
|
|
CDPSTS_BLOCK_ID = 1397769283
|
|
|
|
# Offsets within the legacy block header
|
|
LEGACY_NAME_OFFSET_WORDS = 17
|
|
LEGACY_SIZE_OFFSET_WORDS = 5
|
|
|
|
|
|
def extract_first_cdpsts_block(input_file: Path, output_file: Path):
|
|
"""
|
|
Finds the first CDPSTS block in the input file and writes it to the output file.
|
|
"""
|
|
if not input_file.is_file():
|
|
print(f"Error: Input file not found at '{input_file}'")
|
|
return
|
|
|
|
print(f"Loading data from '{input_file}'...")
|
|
try:
|
|
data_vector = np.fromfile(str(input_file), dtype="<u4")
|
|
print(f"Loaded {data_vector.size} 32-bit words.")
|
|
except Exception as e:
|
|
print(f"Error loading file: {e}")
|
|
return
|
|
|
|
print(f"Searching for legacy block marker (0x{LEGACY_BLOCK_MARKER:X})...")
|
|
legacy_indices = np.where(data_vector == LEGACY_BLOCK_MARKER)[0]
|
|
|
|
# A valid block starts with TWO consecutive markers
|
|
legacy_starts = legacy_indices[:-1][np.diff(legacy_indices) == 1]
|
|
|
|
if not legacy_starts.any():
|
|
print("No legacy blocks with double markers found.")
|
|
return
|
|
|
|
print(
|
|
f"Found {len(legacy_starts)} potential legacy blocks. Searching for CDPSTS..."
|
|
)
|
|
|
|
for start_index in legacy_starts:
|
|
try:
|
|
# Check if it's a CDPSTS block by looking at the name ID
|
|
name_id = data_vector[start_index + LEGACY_NAME_OFFSET_WORDS]
|
|
|
|
if name_id == CDPSTS_BLOCK_ID:
|
|
print(f"Found a CDPSTS block at word offset: {start_index}")
|
|
|
|
# Get the total size of the block's data from its header
|
|
block_size_bytes = data_vector[start_index + LEGACY_SIZE_OFFSET_WORDS]
|
|
block_size_words = (block_size_bytes + 3) // 4
|
|
|
|
# The total block includes the header structure itself.
|
|
# Let's assume a generous header size for legacy blocks to be safe.
|
|
# The IDL script implies the payload starts right after the markers.
|
|
# The size in the header refers to the data payload following the full header.
|
|
# Based on IDL, the payload starts right after the double marker.
|
|
# However, the structure is more complex. Let's extract the whole chunk
|
|
# from the marker to the end of its declared size for analysis.
|
|
|
|
# For simplicity, we extract the entire block as seen by its header.
|
|
# A legacy block starts at the marker and has a certain size.
|
|
# Let's assume the size in the header is for the payload that follows
|
|
# the *full* header structure. Let's approximate the header to 36 words
|
|
# and extract payload.
|
|
|
|
# Correction: The size value at offset 5 is the size of the payload.
|
|
# The block itself starts at the marker. Let's find the full block size.
|
|
# The most reliable way is to find the next block's start.
|
|
|
|
# Let's use a simpler, more direct approach: extract from the marker
|
|
# to the end of the declared payload size, assuming the payload starts
|
|
# right after the header.
|
|
|
|
# Based on the C++ code, the size in the header is the payload size.
|
|
# The entire block written to the file is header + payload.
|
|
# Let's re-read the IDL: it uses `BLOCK_DATA = vector[INDEX_BLOCK:INDEX_BLOCK+BLOCK_SIZE-1]`
|
|
# where BLOCK_SIZE is read from offset 5 and divided by 4.
|
|
# This implies the payload starts at INDEX_BLOCK. This seems off.
|
|
|
|
# Let's go with the most robust extraction: find the start of the next block.
|
|
next_block_starts = legacy_starts[legacy_starts > start_index]
|
|
if not next_block_starts.any():
|
|
print(
|
|
"Found last CDPSTS block in file, cannot determine exact size."
|
|
)
|
|
# Fallback to size from header, assuming it's the full block size
|
|
# This is likely incorrect, but better than nothing.
|
|
end_index = start_index + block_size_words
|
|
else:
|
|
end_index = next_block_starts[0]
|
|
|
|
print(f"Extracting block from word {start_index} to {end_index}.")
|
|
block_data = data_vector[start_index:end_index]
|
|
|
|
print(f"Writing {block_data.size * 4} bytes to '{output_file}'...")
|
|
with open(output_file, "wb") as f:
|
|
f.write(block_data.tobytes())
|
|
|
|
print("Extraction complete.")
|
|
return # Stop after the first one
|
|
|
|
except IndexError:
|
|
# This can happen if a marker is found too close to the end of the file
|
|
continue
|
|
|
|
print("No CDPSTS blocks were found in the file.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) != 2:
|
|
print("Usage: python extract_cdpsts.py <path_to_your_file.out>")
|
|
sys.exit(1)
|
|
|
|
input_path = Path(sys.argv[1])
|
|
output_path = Path("cdpsts_block_sample.out")
|
|
|
|
extract_first_cdpsts_block(input_path, output_path)
|