SXXXXXXX_CodeBridge/codebridge/core/comparer.py
2025-12-23 09:31:38 +01:00

88 lines
3.0 KiB
Python

import os
import hashlib
from typing import Dict, List, Set, Tuple
class CodeComparer:
"""
Handles the comparison between two directory trees with ignore logic.
"""
# Common patterns to ignore during comparison
IGNORE_PATTERNS = {
".svn",
".git",
"__pycache__",
".pytest_cache",
".vscode",
".idea",
"*.pyc",
".DS_Store",
"thumbs.db"
}
def __init__(self, source_path: str, destination_path: str):
self.source_path = source_path
self.destination_path = destination_path
self.added: List[str] = []
self.modified: List[str] = []
self.deleted: List[str] = []
def _is_ignored(self, name: str) -> bool:
"""
Checks if a file or directory name matches the ignore patterns.
"""
if name in self.IGNORE_PATTERNS:
return True
if name.endswith(".pyc"):
return True
return False
def compute_hash(self, file_path: str) -> str:
"""
Computes the SHA-256 hash of a file.
"""
hash_sha256 = hashlib.sha256()
with open(file_path, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_sha256.update(chunk)
return hash_sha256.hexdigest()
def get_relative_files(self, root_path: str) -> Dict[str, str]:
"""
Walks through the directory and returns a dict of relative paths and hashes.
Filters out ignored patterns.
"""
file_map = {}
for root, dirs, files in os.walk(root_path):
# Modify dirs in-place to prevent walking into ignored directories
dirs[:] = [d for d in dirs if not self._is_ignored(d)]
for file in files:
if self._is_ignored(file):
continue
full_path = os.path.join(root, file)
relative_path = os.path.relpath(full_path, root_path)
file_map[relative_path] = full_path
return file_map
def compare(self) -> Tuple[List[str], List[str], List[str]]:
"""
Compares source and destination folders to find changes.
"""
self.added.clear()
self.modified.clear()
self.deleted.clear()
source_files = self.get_relative_files(self.source_path)
dest_files = self.get_relative_files(self.destination_path)
source_keys: Set[str] = set(source_files.keys())
dest_keys: Set[str] = set(dest_files.keys())
self.added = sorted(list(source_keys - dest_keys))
self.deleted = sorted(list(dest_keys - source_keys))
common_files = source_keys.intersection(dest_keys)
for relative_path in common_files:
source_hash = self.compute_hash(source_files[relative_path])
dest_hash = self.compute_hash(dest_files[relative_path])
if source_hash != dest_hash:
self.modified.append(relative_path)
self.modified.sort()
return self.added, self.modified, self.deleted