SXXXXXXX_Radalyze/radalyze/core/file_io.py
2025-06-13 14:13:29 +02:00

113 lines
4.1 KiB
Python

import csv
import numpy as np
from typing import Optional
def load_vector_from_csv(file_path: str) -> np.ndarray:
"""
Loads a single-column vector from a CSV file.
Args:
file_path: The path to the CSV file.
Returns:
A NumPy array containing the vector data.
Raises:
FileNotFoundError: If the specified file does not exist.
ValueError: If the data in the file cannot be converted to float
or if the file is empty.
Exception: For other potential I/O errors.
"""
data = []
try:
with open(file_path, 'r', newline='') as file:
reader = csv.reader(file)
for row in reader:
if not row: # Skip empty rows
continue
data.append(float(row[0]))
except FileNotFoundError:
# Re-raise the exception to be caught by the caller
raise
except (ValueError, IndexError) as e:
# Catch conversion errors (ValueError) or empty rows causing IndexError
raise ValueError(f"Error converting data to float in {file_path}: {e}")
except Exception as e:
# Catch other unforeseen errors during file processing
raise IOError(f"An unexpected error occurred while reading {file_path}: {e}")
if not data:
raise ValueError(f"No data found in file: {file_path}")
return np.array(data)
def load_matrix_from_csv(file_path: str) -> np.ndarray:
"""
Loads a matrix from a custom CSV format.
The expected format is:
- Row 1: "rows",<number_of_rows>
- Row 2: "cols",<number_of_cols>
- Subsequent rows: Matrix data, comma-separated.
Args:
file_path: The path to the CSV file.
Returns:
A NumPy array containing the matrix data.
Raises:
FileNotFoundError: If the specified file does not exist.
ValueError: If the file format is incorrect, dimensions are invalid,
or data cannot be converted to float.
Exception: For other potential I/O errors.
"""
try:
with open(file_path, 'r', newline='') as file:
reader = csv.reader(file)
# Read and validate header for rows
header_rows = next(reader)
if not (len(header_rows) == 2 and header_rows[0].strip().lower() == 'rows'):
raise ValueError("Invalid header format: Expected 'rows, <value>'")
num_rows = int(header_rows[1])
# Read and validate header for columns
header_cols = next(reader)
if not (len(header_cols) == 2 and header_cols[0].strip().lower() == 'cols'):
raise ValueError("Invalid header format: Expected 'cols, <value>'")
num_cols = int(header_cols[1])
if num_rows <= 0 or num_cols <= 0:
raise ValueError("Matrix dimensions must be positive.")
# Read matrix data
data = []
for i, row in enumerate(reader):
if i >= num_rows:
# Optional: Add a warning here later with the logger
break
# Convert each item in the row to float
float_row = [float(x) for x in row]
if len(float_row) != num_cols:
raise ValueError(f"Row {i} has incorrect number of columns. "
f"Expected {num_cols}, found {len(float_row)}.")
data.append(float_row)
if len(data) != num_rows:
raise ValueError(f"Incorrect number of data rows. "
f"Expected {num_rows}, found {len(data)}.")
return np.array(data)
except FileNotFoundError:
raise
except (ValueError, IndexError, StopIteration) as e:
# StopIteration for empty/short files, ValueError for conversion/format,
# IndexError for malformed rows.
raise ValueError(f"Invalid file format or data in {file_path}: {e}")
except Exception as e:
raise IOError(f"An unexpected error occurred while reading {file_path}: {e}")