"""Test baseline creation performance with directory copy vs zip.""" import os import shutil import tempfile import time from pathlib import Path # Create a test project with multiple files temp_project = tempfile.mkdtemp(prefix="test_perf_project_") try: # Create a realistic project structure with multiple files print("Creating test project...") # Create multiple Python files for i in range(20): file_path = Path(temp_project) / f"module_{i}.py" content = f'''"""Module {i}.""" def function_{i}_1(): """Function 1.""" return {i} def function_{i}_2(): """Function 2.""" x = {i * 10} return x * 2 class Class_{i}: """Test class.""" def method_1(self): """Method 1.""" pass def method_2(self): """Method 2.""" return "test_{i}" # Some comments # More comments # Even more comments if __name__ == "__main__": print("Module {i}") ''' file_path.write_text(content, encoding='utf-8') # Create subdirectories with files for subdir_idx in range(3): subdir = Path(temp_project) / f"subdir_{subdir_idx}" subdir.mkdir() for i in range(5): file_path = subdir / f"submodule_{i}.py" content = f'''"""Submodule {subdir_idx}/{i}.""" def sub_function(): return {subdir_idx * 100 + i} ''' file_path.write_text(content, encoding='utf-8') print(f"Test project created: {temp_project}") print(f"Total files: {len(list(Path(temp_project).rglob('*.py')))}") print() # Import modules from pyucc.core.differ import BaselineManager # Create baseline manager baseline_dir = tempfile.mkdtemp(prefix="test_baselines_") bm = BaselineManager(temp_project, baselines_root=baseline_dir) # Test baseline creation (now using directory copy) print("Creating baseline with directory copy...") start_time = time.time() baseline_id = bm.create_baseline_from_dir( temp_project, baseline_id="test_baseline_dir", snapshot=True, compute_sha1=True ) end_time = time.time() creation_time = end_time - start_time print(f"Baseline created: {baseline_id}") print(f"⏱️ Time: {creation_time:.3f} seconds") # Check what was created baseline_path = bm._baseline_dir(baseline_id) files_dir = os.path.join(baseline_path, "files") files_zip = os.path.join(baseline_path, "files.zip") print() print("Created artifacts:") print(f" Baseline dir: {os.path.exists(baseline_path)}") print(f" files/ directory: {os.path.exists(files_dir)}") print(f" files.zip: {os.path.exists(files_zip)}") if os.path.exists(files_dir): file_count = len(list(Path(files_dir).rglob('*.py'))) print(f" Files in snapshot: {file_count}") # Calculate size total_size = sum(f.stat().st_size for f in Path(files_dir).rglob('*') if f.is_file()) print(f" Snapshot size: {total_size / 1024:.2f} KB") print() # Verify baseline can be loaded print("Loading and verifying baseline...") meta = bm.load_metadata(baseline_id) print(f" Files in metadata: {len(meta.files)}") print(f" Profile: {meta.profile}") print(f" Created at: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(meta.created_at))}") # Check countings were computed countings_ok = sum(1 for fm in meta.files if fm.countings is not None) print(f" Files with countings: {countings_ok}/{len(meta.files)}") print() if os.path.exists(files_dir) and not os.path.exists(files_zip): print("✅ SUCCESS: Baseline uses directory copy (files/) instead of zip!") print(f" Creation time: {creation_time:.3f}s") print(" Benefits:") print(" - Faster creation (no compression)") print(" - Immediate access to files (no extraction needed)") print(" - Easier to inspect baseline contents") elif os.path.exists(files_zip): print("⚠️ WARNING: files.zip still exists (should have been removed)") else: print("❌ FAIL: No snapshot found") finally: # Cleanup shutil.rmtree(temp_project, ignore_errors=True) if 'baseline_dir' in locals(): shutil.rmtree(baseline_dir, ignore_errors=True)