import os import hashlib from typing import Dict, List, Set, Tuple class CodeComparer: """ Handles the comparison between two directory trees with ignore logic. """ # Common patterns to ignore during comparison IGNORE_PATTERNS = { ".svn", ".git", "__pycache__", ".pytest_cache", ".vscode", ".idea", "*.pyc", ".DS_Store", "thumbs.db" } def __init__(self, source_path: str, destination_path: str, ignore_extensions=None): self.source_path = source_path self.destination_path = destination_path # ignore_extensions: iterable of extensions (including dot), e.g. ['.o', '.d'] # Normalize: strip whitespace, ensure leading dot, lowercase norm = set() for e in (ignore_extensions or []): try: s = str(e).strip() except Exception: continue if not s: continue if not s.startswith('.'): s = '.' + s norm.add(s.lower()) self.ignore_extensions = norm self.added: List[str] = [] self.modified: List[str] = [] self.deleted: List[str] = [] def _is_ignored(self, name: str) -> bool: """ Checks if a file or directory name matches the ignore patterns. """ # exact-name ignores if name in self.IGNORE_PATTERNS: return True # wildcard-like patterns in IGNORE_PATTERNS (simple suffix check for patterns like '*.pyc') for pat in self.IGNORE_PATTERNS: if pat.startswith("*") and name.lower().endswith(pat.lstrip("*").lower()): return True # explicit extension ignores provided at runtime # compare case-insensitive lname = name.lower() for ext in self.ignore_extensions: if ext and lname.endswith(ext): return True return False def compute_hash(self, file_path: str) -> str: """ Computes the SHA-256 hash of a file. """ hash_sha256 = hashlib.sha256() with open(file_path, "rb") as f: for chunk in iter(lambda: f.read(4096), b""): hash_sha256.update(chunk) return hash_sha256.hexdigest() def get_relative_files(self, root_path: str) -> Dict[str, str]: """ Walks through the directory and returns a dict of relative paths and hashes. Filters out ignored patterns. """ file_map = {} for root, dirs, files in os.walk(root_path): # Modify dirs in-place to prevent walking into ignored directories dirs[:] = [d for d in dirs if not self._is_ignored(d)] for file in files: if self._is_ignored(file): continue full_path = os.path.join(root, file) relative_path = os.path.relpath(full_path, root_path) file_map[relative_path] = full_path return file_map def compare(self) -> Tuple[List[str], List[str], List[str]]: """ Compares source and destination folders to find changes. """ self.added.clear() self.modified.clear() self.deleted.clear() source_files = self.get_relative_files(self.source_path) dest_files = self.get_relative_files(self.destination_path) source_keys: Set[str] = set(source_files.keys()) dest_keys: Set[str] = set(dest_files.keys()) self.added = sorted(list(source_keys - dest_keys)) self.deleted = sorted(list(dest_keys - source_keys)) common_files = source_keys.intersection(dest_keys) for relative_path in common_files: source_hash = self.compute_hash(source_files[relative_path]) dest_hash = self.compute_hash(dest_files[relative_path]) if source_hash != dest_hash: self.modified.append(relative_path) self.modified.sort() return self.added, self.modified, self.deleted