114 lines
4.0 KiB
Python
114 lines
4.0 KiB
Python
import os
|
|
import hashlib
|
|
from typing import Dict, List, Set, Tuple
|
|
|
|
|
|
class CodeComparer:
|
|
"""
|
|
Handles the comparison between two directory trees with ignore logic.
|
|
"""
|
|
|
|
# Common patterns to ignore during comparison
|
|
IGNORE_PATTERNS = {
|
|
".svn",
|
|
".git",
|
|
"__pycache__",
|
|
".pytest_cache",
|
|
".vscode",
|
|
".idea",
|
|
"*.pyc",
|
|
".DS_Store",
|
|
"thumbs.db"
|
|
}
|
|
|
|
def __init__(self, source_path: str, destination_path: str, ignore_extensions=None):
|
|
self.source_path = source_path
|
|
self.destination_path = destination_path
|
|
# ignore_extensions: iterable of extensions (including dot), e.g. ['.o', '.d']
|
|
# Normalize: strip whitespace, ensure leading dot, lowercase
|
|
norm = set()
|
|
for e in (ignore_extensions or []):
|
|
try:
|
|
s = str(e).strip()
|
|
except Exception:
|
|
continue
|
|
if not s:
|
|
continue
|
|
if not s.startswith('.'):
|
|
s = '.' + s
|
|
norm.add(s.lower())
|
|
self.ignore_extensions = norm
|
|
self.added: List[str] = []
|
|
self.modified: List[str] = []
|
|
self.deleted: List[str] = []
|
|
|
|
def _is_ignored(self, name: str) -> bool:
|
|
"""
|
|
Checks if a file or directory name matches the ignore patterns.
|
|
"""
|
|
# exact-name ignores
|
|
if name in self.IGNORE_PATTERNS:
|
|
return True
|
|
|
|
# wildcard-like patterns in IGNORE_PATTERNS (simple suffix check for patterns like '*.pyc')
|
|
for pat in self.IGNORE_PATTERNS:
|
|
if pat.startswith("*") and name.lower().endswith(pat.lstrip("*").lower()):
|
|
return True
|
|
|
|
# explicit extension ignores provided at runtime
|
|
# compare case-insensitive
|
|
lname = name.lower()
|
|
for ext in self.ignore_extensions:
|
|
if ext and lname.endswith(ext):
|
|
return True
|
|
|
|
return False
|
|
|
|
def compute_hash(self, file_path: str) -> str:
|
|
"""
|
|
Computes the SHA-256 hash of a file.
|
|
"""
|
|
hash_sha256 = hashlib.sha256()
|
|
with open(file_path, "rb") as f:
|
|
for chunk in iter(lambda: f.read(4096), b""):
|
|
hash_sha256.update(chunk)
|
|
return hash_sha256.hexdigest()
|
|
|
|
def get_relative_files(self, root_path: str) -> Dict[str, str]:
|
|
"""
|
|
Walks through the directory and returns a dict of relative paths and hashes.
|
|
Filters out ignored patterns.
|
|
"""
|
|
file_map = {}
|
|
for root, dirs, files in os.walk(root_path):
|
|
# Modify dirs in-place to prevent walking into ignored directories
|
|
dirs[:] = [d for d in dirs if not self._is_ignored(d)]
|
|
for file in files:
|
|
if self._is_ignored(file):
|
|
continue
|
|
full_path = os.path.join(root, file)
|
|
relative_path = os.path.relpath(full_path, root_path)
|
|
file_map[relative_path] = full_path
|
|
return file_map
|
|
|
|
def compare(self) -> Tuple[List[str], List[str], List[str]]:
|
|
"""
|
|
Compares source and destination folders to find changes.
|
|
"""
|
|
self.added.clear()
|
|
self.modified.clear()
|
|
self.deleted.clear()
|
|
source_files = self.get_relative_files(self.source_path)
|
|
dest_files = self.get_relative_files(self.destination_path)
|
|
source_keys: Set[str] = set(source_files.keys())
|
|
dest_keys: Set[str] = set(dest_files.keys())
|
|
self.added = sorted(list(source_keys - dest_keys))
|
|
self.deleted = sorted(list(dest_keys - source_keys))
|
|
common_files = source_keys.intersection(dest_keys)
|
|
for relative_path in common_files:
|
|
source_hash = self.compute_hash(source_files[relative_path])
|
|
dest_hash = self.compute_hash(dest_files[relative_path])
|
|
if source_hash != dest_hash:
|
|
self.modified.append(relative_path)
|
|
self.modified.sort()
|
|
return self.added, self.modified, self.deleted |