SXXXXXXX_DependencyAnalyzer/dependencyanalyzer/core/stdlib_detector.py
2025-11-10 14:18:35 +01:00

257 lines
11 KiB
Python

# dependency_analyzer/core/stdlib_detector.py
"""
Handles the detection of Python's standard library modules.
This module provides functions to determine if a module name or file path
belongs to the Python standard library, accommodating different Python versions.
"""
import importlib.util
import logging
import os
import sys
import sysconfig
from pathlib import Path
from typing import Optional, Set
# --- Logger Configuration ---
logger = logging.getLogger(__name__)
# --- Standard Library Detection Logic ---
# Attempt to get the frozenset of stdlib modules, introduced in Python 3.10
if sys.version_info >= (3, 10):
try:
# The most reliable method for modern Python versions
STANDARD_LIBRARY_MODULES: frozenset[str] = sys.stdlib_module_names
logger.debug(
"Using sys.stdlib_module_names for standard library module list (Python 3.10+)."
)
except AttributeError:
# Fallback in case the attribute is missing for some reason
logger.warning(
"sys.stdlib_module_names not found despite Python 3.10+. Using a predefined list."
)
STANDARD_LIBRARY_MODULES = frozenset() # Will be populated by the predefined list
else:
# For Python versions older than 3.10
logger.debug(
"Using a predefined list for standard library modules (Python < 3.10)."
)
STANDARD_LIBRARY_MODULES = frozenset() # Will be populated below
# If the modern method is unavailable or failed, use a hardcoded list.
# This list is extensive but might not be perfectly up-to-date.
if not STANDARD_LIBRARY_MODULES:
_PREDEFINED_STDLIBS = {
"abc", "aifc", "argparse", "array", "ast", "asynchat", "asyncio",
"asyncore", "atexit", "audioop", "base64", "bdb", "binascii",
"binhex", "bisect", "builtins", "bz2", "calendar", "cgi", "cgitb",
"chunk", "cmath", "cmd", "code", "codecs", "codeop", "collections",
"colorsys", "compileall", "concurrent", "configparser", "contextlib",
"contextvars", "copy", "copyreg", "cProfile", "crypt", "csv",
"ctypes", "curses", "dataclasses", "datetime", "dbm", "decimal",
"difflib", "dis", "distutils", "doctest", "email", "encodings",
"ensurepip", "enum", "errno", "faulthandler", "fcntl", "filecmp",
"fileinput", "fnmatch", "formatter", "fractions", "ftplib",
"functools", "gc", "getopt", "getpass", "gettext", "glob",
"graphlib", "grp", "gzip", "hashlib", "heapq", "hmac", "html",
"http", "idlelib", "imaplib", "imghdr", "imp", "importlib",
"inspect", "io", "ipaddress", "itertools", "json", "keyword",
"lib2to3", "linecache", "locale", "logging", "lzma", "mailbox",
"mailcap", "marshal", "math", "mimetypes", "mmap", "modulefinder",
"multiprocessing", "netrc", "nis", "nntplib", "numbers", "operator",
"optparse", "os", "ossaudiodev", "parser", "pathlib", "pdb",
"pickle", "pickletools", "pipes", "pkgutil", "platform", "plistlib",
"poplib", "posix", "pprint", "profile", "pstats", "pty", "pwd",
"py_compile", "pyclbr", "pydoc", "pydoc_data", "pyexpat", "queue",
"quopri", "random", "re", "readline", "reprlib", "resource",
"rlcompleter", "runpy", "sched", "secrets", "select", "selectors",
"shelve", "shlex", "shutil", "signal", "site", "smtpd", "smtplib",
"sndhdr", "socket", "socketserver", "spwd", "sqlite3", "ssl",
"stat", "statistics", "string", "stringprep", "struct",
"subprocess", "sunau", "symtable", "sys", "sysconfig", "syslog",
"tabnanny", "tarfile", "telnetlib", "tempfile", "termios",
"textwrap", "threading", "time", "timeit", "tkinter", "token",
"tokenize", "trace", "traceback", "tracemalloc", "tty", "turtle",
"turtledemo", "types", "typing", "unicodedata", "unittest",
"urllib", "uu", "uuid", "venv", "warnings", "wave", "weakref",
"webbrowser", "wsgiref", "xdrlib", "xml", "xmlrpc", "zipapp",
"zipfile", "zipimport", "zlib", "_thread", "_collections_abc",
"_json", "_datetime", "_weakrefset", "_strptime", "_socket", "_ssl",
"_struct", "_queue", "_pickle", "_lsprof", "_heapq", "_hashlib",
"_csv", "_bz2", "_codecs", "_bisect", "_blake2", "_asyncio", "_ast",
"_abc",
}
STANDARD_LIBRARY_MODULES = frozenset(_PREDEFINED_STDLIBS)
logger.debug("Populated standard library list from predefined set.")
_CACHED_STD_LIB_PATHS: Optional[Set[str]] = None
def get_standard_library_paths() -> Set[str]:
"""
Retrieves and caches normalized paths for the Python standard library.
This is a fallback/supplement to the module name list.
"""
global _CACHED_STD_LIB_PATHS
if _CACHED_STD_LIB_PATHS is not None:
return _CACHED_STD_LIB_PATHS
paths: Set[str] = set()
logger.debug("Determining standard library paths for the first time...")
try:
# Common paths from sysconfig
for path_name in ("stdlib", "platstdlib"):
try:
path_val = sysconfig.get_path(path_name)
if path_val and os.path.isdir(path_val):
paths.add(os.path.normpath(path_val))
logger.debug(f"Found stdlib path ({path_name}): {path_val}")
except Exception as e:
logger.warning(
f"Could not get sysconfig path '{path_name}': {e}"
)
# Path relative to the Python executable
prefix_lib_path = os.path.normpath(
os.path.join(
sys.prefix,
"lib",
f"python{sys.version_info.major}.{sys.version_info.minor}",
)
)
if os.path.isdir(prefix_lib_path):
paths.add(prefix_lib_path)
logger.debug(f"Found stdlib path (prefix): {prefix_lib_path}")
# Platform-specific paths
if sys.platform == "win32":
dlls_path = os.path.join(sys.prefix, "DLLs")
if os.path.isdir(dlls_path):
paths.add(os.path.normpath(dlls_path))
logger.debug(f"Found stdlib path (DLLs): {dlls_path}")
else:
dynload_path = os.path.join(
sys.exec_prefix,
"lib",
f"python{sys.version_info.major}.{sys.version_info.minor}",
"lib-dynload",
)
if os.path.isdir(dynload_path):
paths.add(os.path.normpath(dynload_path))
logger.debug(f"Found stdlib path (dynload): {dynload_path}")
# Framework paths (macOS)
fw_prefix = sysconfig.get_config_var("PYTHONFRAMEWORKPREFIX")
if fw_prefix and isinstance(fw_prefix, str) and os.path.isdir(fw_prefix):
fw_path = os.path.normpath(
os.path.join(
fw_prefix,
"lib",
f"python{sys.version_info.major}.{sys.version_info.minor}",
)
)
if os.path.isdir(fw_path):
paths.add(fw_path)
logger.debug(f"Found stdlib path (Framework): {fw_path}")
# Fallback if sysconfig fails, using well-known module locations
if not paths:
logger.warning("Sysconfig paths failed, attempting fallback using module locations.")
try:
paths.add(os.path.normpath(os.path.dirname(os.__file__)))
except (AttributeError, TypeError):
logger.error("Could not determine path for 'os' module.")
try:
paths.add(os.path.normpath(os.path.dirname(sysconfig.__file__)))
except (AttributeError, TypeError):
logger.error("Could not determine path for 'sysconfig' module.")
except Exception as e:
logger.exception(f"Unexpected error while determining standard library paths: {e}")
_CACHED_STD_LIB_PATHS = {p for p in paths if p} # Filter out any empty paths
if not _CACHED_STD_LIB_PATHS:
logger.error("Failed to determine ANY standard library paths. Path-based checks will fail.")
else:
logger.debug(f"Final cached standard library paths: {_CACHED_STD_LIB_PATHS}")
return _CACHED_STD_LIB_PATHS
def is_path_in_standard_library(file_path_str: Optional[str]) -> bool:
"""
Checks if a file path string is within any known standard library directory.
Excludes 'site-packages' and 'dist-packages' explicitly.
"""
if not file_path_str:
return False
std_lib_paths = get_standard_library_paths()
if not std_lib_paths:
return False
try:
norm_file_path = os.path.normpath(os.path.abspath(file_path_str))
# Quick check to exclude third-party packages
path_parts = Path(norm_file_path).parts
if "site-packages" in path_parts or "dist-packages" in path_parts:
return False
# Check if the file path starts with any of the identified stdlib paths
for std_path in std_lib_paths:
# Ensure the comparison is robust by checking for the directory separator
if norm_file_path.startswith(std_path + os.sep):
return True
except Exception as e:
logger.warning(f"Error during path comparison for '{file_path_str}': {e}")
return False
return False
def is_standard_library(module_name: str) -> bool:
"""
Checks if a module name belongs to the standard library using multiple strategies.
1. Checks against a predefined/system-provided list of names.
2. Finds the module's specification (`find_spec`) to check its origin.
3. If the origin is a file path, checks if it's in a standard library directory.
"""
# Strategy 1: Check against the reliable name list
if module_name in STANDARD_LIBRARY_MODULES:
logger.debug(f"'{module_name}' is in the standard library name set.")
return True
# Strategy 2: Use importlib to find the module's specification
try:
spec = importlib.util.find_spec(module_name)
except (ValueError, ModuleNotFoundError, Exception) as e:
# Catching specific errors and a general Exception is safer than a bare except.
# ValueError can occur for invalid names, ModuleNotFoundError for non-existent parents.
logger.debug(
f"Could not find spec for '{module_name}' (error: {e}). Assuming it's non-standard."
)
return False
if spec is None:
logger.debug(f"No spec found for '{module_name}'. Assuming non-standard.")
return False
origin = spec.origin
logger.debug(f"Module '{module_name}' has origin: '{origin}'")
# 'built-in' or 'frozen' modules are part of the standard library
if origin in ("built-in", "frozen"):
logger.debug(f"'{module_name}' is a '{origin}' module.")
return True
# Strategy 3: Check if the module's file path is in a stdlib directory
if is_path_in_standard_library(origin):
logger.debug(f"Path for '{module_name}' ('{origin}') is a standard library path.")
return True
logger.debug(f"'{module_name}' is not classified as standard library.")
return False