# profileAnalyzer/core/core.py """ Core logic for loading and analyzing cProfile .prof files. """ import pstats from io import StringIO class ProfileAnalyzer: """ Handles loading a profile data file and extracting statistics. """ def __init__(self): self.stats = None self.profile_path = None def load_profile(self, filepath: str) -> bool: """ Loads a .prof file into a pstats.Stats object. Args: filepath (str): The path to the .prof file. Returns: bool: True if loading was successful, False otherwise. """ try: self.stats = pstats.Stats(filepath) self.stats.strip_dirs() # Clean up filenames for readability self.profile_path = filepath return True except (FileNotFoundError, TypeError, OSError) as e: # Handle cases where the file doesn't exist or is not a valid stats file print(f"Error loading profile file '{filepath}': {e}") self.stats = None self.profile_path = None return False def get_stats(self, sort_by: str, limit: int = 50) -> list: """ Gets a list of formatted statistics sorted by a given key. Args: sort_by (str): The key to sort statistics by. Valid keys include: 'calls', 'ncalls', 'tottime', 'cumulative', 'cumtime'. limit (int): The maximum number of rows to return. Returns: list: A list of tuples, where each tuple represents a function's profile data: (ncalls, tottime, percall_tottime, cumtime, percall_cumtime, filename:lineno(function)). Returns an empty list if no stats are loaded. """ if not self.stats: return [] # Redirect stdout to capture the output of print_stats s = StringIO() # The 'pstats.Stats' constructor can take a stream argument. # We re-create the object to direct its output to our StringIO stream. # This is a standard pattern for capturing pstats output. stats_to_print = pstats.Stats(self.profile_path, stream=s) stats_to_print.strip_dirs() stats_to_print.sort_stats(sort_by) # print_stats(limit) prints the top 'limit' entries stats_to_print.print_stats(limit) s.seek(0) # Rewind the stream to the beginning # --- Parse the captured string output into a structured list --- lines = s.getvalue().splitlines() results = [] # Find the start of the data table (after the header lines) data_started = False for line in lines: if not line.strip(): continue if 'ncalls' in line and 'tottime' in line: data_started = True continue if data_started: # pstats output is space-separated, but function names can have spaces. # A robust way is to split by whitespace a fixed number of times. parts = line.strip().split(maxsplit=5) if len(parts) == 6: try: # Convert numeric parts to float, leave function name as string ncalls = parts[0] # ncalls can be "x/y", so keep as string tottime = float(parts[1]) percall_tottime = float(parts[2]) cumtime = float(parts[3]) percall_cumtime = float(parts[4]) func_info = parts[5] results.append(( ncalls, tottime, percall_tottime, cumtime, percall_cumtime, func_info )) except (ValueError, IndexError): # Skip lines that don't parse correctly continue return results