SXXXXXXX_PyHasher/pyhasher/core/core.py

# pyhasher/core/core.py

import hashlib
import zlib

# Define a constant for the chunk size to read from the file.
# This helps in processing large files without loading them entirely into memory.
BUFFER_SIZE = 65536  # 64KB

def calculate_hashes_for_file(file_path: str) -> dict:
    """
    Calculates various hashes for a given file.

    Includes checksums (CRC32, Adler-32) and cryptographic hashes
    (MD5, SHA-1, SHA-2, SHA-3, BLAKE2).

    Args:
        file_path: The absolute path to the file.

    Returns:
        A dictionary containing the calculated hashes.
        Example: {'CRC32': '...', 'MD5': '...', ...}

    Raises:
        IOError: If the file cannot be read.
    """
    # Initialize checksums
    crc32_val = 0
    adler32_val = 1  # Adler-32 starts with a value of 1

    # Initialize cryptographic hashers
    hashers = {
        "MD5": hashlib.md5(),
        "SHA-1": hashlib.sha1(),
        "SHA-256": hashlib.sha256(),
        "SHA-384": hashlib.sha384(),
        "SHA-512": hashlib.sha512(),
        "SHA3-256": hashlib.sha3_256(),
        "BLAKE2b": hashlib.blake2b(),
    }

    with open(file_path, "rb") as f:
        while True:
            # Read the file in chunks
            data = f.read(BUFFER_SIZE)
            if not data:
                break

            # Update each cryptographic hasher
            for hasher in hashers.values():
                hasher.update(data)

            # Update checksums
            crc32_val = zlib.crc32(data, crc32_val)
            adler32_val = zlib.adler32(data, adler32_val)

    # Prepare the results dictionary, starting with checksums
    results = {
        "CRC32": f"{crc32_val:08x}",
        "Adler-32": f"{adler32_val:08x}",
    }

    # Add cryptographic hashes to the results
    for name, hasher in hashers.items():
        results[name] = hasher.hexdigest()

    return results