SXXXXXXX_ProfileAnalyzer/profileanalyzer/core/core.py

# profileAnalyzer/core/core.py

"""
Core logic for loading and analyzing cProfile .prof files.
"""
import pstats
from io import StringIO

class ProfileAnalyzer:
    """
    Handles loading a profile data file and extracting statistics.
    """
    def __init__(self):
        self.stats = None
        self.profile_path = None

    def load_profile(self, filepath: str) -> bool:
        """
        Loads a .prof file into a pstats.Stats object.

        Args:
            filepath (str): The path to the .prof file.

        Returns:
            bool: True if loading was successful, False otherwise.
        """
        try:
            self.stats = pstats.Stats(filepath)
            self.stats.strip_dirs()  # Clean up filenames for readability
            self.profile_path = filepath
            return True
        except (FileNotFoundError, TypeError, OSError) as e:
            # Handle cases where the file doesn't exist or is not a valid stats file
            print(f"Error loading profile file '{filepath}': {e}")
            self.stats = None
            self.profile_path = None
            return False

    def get_stats(self, sort_by: str, limit: int = 50) -> list:
        """
        Gets a list of formatted statistics sorted by a given key.

        Args:
            sort_by (str): The key to sort statistics by. Valid keys include:
                           'calls', 'ncalls', 'tottime', 'cumulative', 'cumtime'.
            limit (int): The maximum number of rows to return.

        Returns:
            list: A list of tuples, where each tuple represents a function's
                  profile data: (ncalls, tottime, percall_tottime, cumtime,
                  percall_cumtime, filename:lineno(function)).
                  Returns an empty list if no stats are loaded.
        """
        if not self.stats:
            return []

        # Redirect stdout to capture the output of print_stats
        s = StringIO()
        # The 'pstats.Stats' constructor can take a stream argument.
        # We re-create the object to direct its output to our StringIO stream.
        # This is a standard pattern for capturing pstats output.
        stats_to_print = pstats.Stats(self.profile_path, stream=s)
        stats_to_print.strip_dirs()
        stats_to_print.sort_stats(sort_by)

        # print_stats(limit) prints the top 'limit' entries
        stats_to_print.print_stats(limit)

        s.seek(0) # Rewind the stream to the beginning

        # --- Parse the captured string output into a structured list ---
        lines = s.getvalue().splitlines()

        results = []
        # Find the start of the data table (after the header lines)
        data_started = False
        for line in lines:
            if not line.strip():
                continue
            if 'ncalls' in line and 'tottime' in line:
                data_started = True
                continue

            if data_started:
                # pstats output is space-separated, but function names can have spaces.
                # A robust way is to split by whitespace a fixed number of times.
                parts = line.strip().split(maxsplit=5)
                if len(parts) == 6:
                    try:
                        # Convert numeric parts to float, leave function name as string
                        ncalls = parts[0] # ncalls can be "x/y", so keep as string
                        tottime = float(parts[1])
                        percall_tottime = float(parts[2])
                        cumtime = float(parts[3])
                        percall_cumtime = float(parts[4])
                        func_info = parts[5]

                        results.append((
                            ncalls, tottime, percall_tottime, cumtime,
                            percall_cumtime, func_info
                        ))
                    except (ValueError, IndexError):
                        # Skip lines that don't parse correctly
                        continue

        return results