# dependency_analyzer/core/stdlib_detector.py """ Handles the detection of Python's standard library modules. This module provides functions to determine if a module name or file path belongs to the Python standard library, accommodating different Python versions. """ import importlib.util import logging import os import sys import sysconfig from pathlib import Path from typing import Optional, Set # --- Logger Configuration --- logger = logging.getLogger(__name__) # --- Standard Library Detection Logic --- # Attempt to get the frozenset of stdlib modules, introduced in Python 3.10 if sys.version_info >= (3, 10): try: # The most reliable method for modern Python versions STANDARD_LIBRARY_MODULES: frozenset[str] = sys.stdlib_module_names logger.debug( "Using sys.stdlib_module_names for standard library module list (Python 3.10+)." ) except AttributeError: # Fallback in case the attribute is missing for some reason logger.warning( "sys.stdlib_module_names not found despite Python 3.10+. Using a predefined list." ) STANDARD_LIBRARY_MODULES = frozenset() # Will be populated by the predefined list else: # For Python versions older than 3.10 logger.debug( "Using a predefined list for standard library modules (Python < 3.10)." ) STANDARD_LIBRARY_MODULES = frozenset() # Will be populated below # If the modern method is unavailable or failed, use a hardcoded list. # This list is extensive but might not be perfectly up-to-date. if not STANDARD_LIBRARY_MODULES: _PREDEFINED_STDLIBS = { "abc", "aifc", "argparse", "array", "ast", "asynchat", "asyncio", "asyncore", "atexit", "audioop", "base64", "bdb", "binascii", "binhex", "bisect", "builtins", "bz2", "calendar", "cgi", "cgitb", "chunk", "cmath", "cmd", "code", "codecs", "codeop", "collections", "colorsys", "compileall", "concurrent", "configparser", "contextlib", "contextvars", "copy", "copyreg", "cProfile", "crypt", "csv", "ctypes", "curses", "dataclasses", "datetime", "dbm", "decimal", "difflib", "dis", "distutils", "doctest", "email", "encodings", "ensurepip", "enum", "errno", "faulthandler", "fcntl", "filecmp", "fileinput", "fnmatch", "formatter", "fractions", "ftplib", "functools", "gc", "getopt", "getpass", "gettext", "glob", "graphlib", "grp", "gzip", "hashlib", "heapq", "hmac", "html", "http", "idlelib", "imaplib", "imghdr", "imp", "importlib", "inspect", "io", "ipaddress", "itertools", "json", "keyword", "lib2to3", "linecache", "locale", "logging", "lzma", "mailbox", "mailcap", "marshal", "math", "mimetypes", "mmap", "modulefinder", "multiprocessing", "netrc", "nis", "nntplib", "numbers", "operator", "optparse", "os", "ossaudiodev", "parser", "pathlib", "pdb", "pickle", "pickletools", "pipes", "pkgutil", "platform", "plistlib", "poplib", "posix", "pprint", "profile", "pstats", "pty", "pwd", "py_compile", "pyclbr", "pydoc", "pydoc_data", "pyexpat", "queue", "quopri", "random", "re", "readline", "reprlib", "resource", "rlcompleter", "runpy", "sched", "secrets", "select", "selectors", "shelve", "shlex", "shutil", "signal", "site", "smtpd", "smtplib", "sndhdr", "socket", "socketserver", "spwd", "sqlite3", "ssl", "stat", "statistics", "string", "stringprep", "struct", "subprocess", "sunau", "symtable", "sys", "sysconfig", "syslog", "tabnanny", "tarfile", "telnetlib", "tempfile", "termios", "textwrap", "threading", "time", "timeit", "tkinter", "token", "tokenize", "trace", "traceback", "tracemalloc", "tty", "turtle", "turtledemo", "types", "typing", "unicodedata", "unittest", "urllib", "uu", "uuid", "venv", "warnings", "wave", "weakref", "webbrowser", "wsgiref", "xdrlib", "xml", "xmlrpc", "zipapp", "zipfile", "zipimport", "zlib", "_thread", "_collections_abc", "_json", "_datetime", "_weakrefset", "_strptime", "_socket", "_ssl", "_struct", "_queue", "_pickle", "_lsprof", "_heapq", "_hashlib", "_csv", "_bz2", "_codecs", "_bisect", "_blake2", "_asyncio", "_ast", "_abc", } STANDARD_LIBRARY_MODULES = frozenset(_PREDEFINED_STDLIBS) logger.debug("Populated standard library list from predefined set.") _CACHED_STD_LIB_PATHS: Optional[Set[str]] = None def get_standard_library_paths() -> Set[str]: """ Retrieves and caches normalized paths for the Python standard library. This is a fallback/supplement to the module name list. """ global _CACHED_STD_LIB_PATHS if _CACHED_STD_LIB_PATHS is not None: return _CACHED_STD_LIB_PATHS paths: Set[str] = set() logger.debug("Determining standard library paths for the first time...") try: # Common paths from sysconfig for path_name in ("stdlib", "platstdlib"): try: path_val = sysconfig.get_path(path_name) if path_val and os.path.isdir(path_val): paths.add(os.path.normpath(path_val)) logger.debug(f"Found stdlib path ({path_name}): {path_val}") except Exception as e: logger.warning( f"Could not get sysconfig path '{path_name}': {e}" ) # Path relative to the Python executable prefix_lib_path = os.path.normpath( os.path.join( sys.prefix, "lib", f"python{sys.version_info.major}.{sys.version_info.minor}", ) ) if os.path.isdir(prefix_lib_path): paths.add(prefix_lib_path) logger.debug(f"Found stdlib path (prefix): {prefix_lib_path}") # Platform-specific paths if sys.platform == "win32": dlls_path = os.path.join(sys.prefix, "DLLs") if os.path.isdir(dlls_path): paths.add(os.path.normpath(dlls_path)) logger.debug(f"Found stdlib path (DLLs): {dlls_path}") else: dynload_path = os.path.join( sys.exec_prefix, "lib", f"python{sys.version_info.major}.{sys.version_info.minor}", "lib-dynload", ) if os.path.isdir(dynload_path): paths.add(os.path.normpath(dynload_path)) logger.debug(f"Found stdlib path (dynload): {dynload_path}") # Framework paths (macOS) fw_prefix = sysconfig.get_config_var("PYTHONFRAMEWORKPREFIX") if fw_prefix and isinstance(fw_prefix, str) and os.path.isdir(fw_prefix): fw_path = os.path.normpath( os.path.join( fw_prefix, "lib", f"python{sys.version_info.major}.{sys.version_info.minor}", ) ) if os.path.isdir(fw_path): paths.add(fw_path) logger.debug(f"Found stdlib path (Framework): {fw_path}") # Fallback if sysconfig fails, using well-known module locations if not paths: logger.warning("Sysconfig paths failed, attempting fallback using module locations.") try: paths.add(os.path.normpath(os.path.dirname(os.__file__))) except (AttributeError, TypeError): logger.error("Could not determine path for 'os' module.") try: paths.add(os.path.normpath(os.path.dirname(sysconfig.__file__))) except (AttributeError, TypeError): logger.error("Could not determine path for 'sysconfig' module.") except Exception as e: logger.exception(f"Unexpected error while determining standard library paths: {e}") _CACHED_STD_LIB_PATHS = {p for p in paths if p} # Filter out any empty paths if not _CACHED_STD_LIB_PATHS: logger.error("Failed to determine ANY standard library paths. Path-based checks will fail.") else: logger.debug(f"Final cached standard library paths: {_CACHED_STD_LIB_PATHS}") return _CACHED_STD_LIB_PATHS def is_path_in_standard_library(file_path_str: Optional[str]) -> bool: """ Checks if a file path string is within any known standard library directory. Excludes 'site-packages' and 'dist-packages' explicitly. """ if not file_path_str: return False std_lib_paths = get_standard_library_paths() if not std_lib_paths: return False try: norm_file_path = os.path.normpath(os.path.abspath(file_path_str)) # Quick check to exclude third-party packages path_parts = Path(norm_file_path).parts if "site-packages" in path_parts or "dist-packages" in path_parts: return False # Check if the file path starts with any of the identified stdlib paths for std_path in std_lib_paths: # Ensure the comparison is robust by checking for the directory separator if norm_file_path.startswith(std_path + os.sep): return True except Exception as e: logger.warning(f"Error during path comparison for '{file_path_str}': {e}") return False return False def is_standard_library(module_name: str) -> bool: """ Checks if a module name belongs to the standard library using multiple strategies. 1. Checks against a predefined/system-provided list of names. 2. Finds the module's specification (`find_spec`) to check its origin. 3. If the origin is a file path, checks if it's in a standard library directory. """ # Strategy 1: Check against the reliable name list if module_name in STANDARD_LIBRARY_MODULES: logger.debug(f"'{module_name}' is in the standard library name set.") return True # Strategy 2: Use importlib to find the module's specification try: spec = importlib.util.find_spec(module_name) except (ValueError, ModuleNotFoundError, Exception) as e: # Catching specific errors and a general Exception is safer than a bare except. # ValueError can occur for invalid names, ModuleNotFoundError for non-existent parents. logger.debug( f"Could not find spec for '{module_name}' (error: {e}). Assuming it's non-standard." ) return False if spec is None: logger.debug(f"No spec found for '{module_name}'. Assuming non-standard.") return False origin = spec.origin logger.debug(f"Module '{module_name}' has origin: '{origin}'") # 'built-in' or 'frozen' modules are part of the standard library if origin in ("built-in", "frozen"): logger.debug(f"'{module_name}' is a '{origin}' module.") return True # Strategy 3: Check if the module's file path is in a stdlib directory if is_path_in_standard_library(origin): logger.debug(f"Path for '{module_name}' ('{origin}') is a standard library path.") return True logger.debug(f"'{module_name}' is not classified as standard library.") return False