257 lines
11 KiB
Python
257 lines
11 KiB
Python
# dependency_analyzer/core/stdlib_detector.py
|
|
"""
|
|
Handles the detection of Python's standard library modules.
|
|
|
|
This module provides functions to determine if a module name or file path
|
|
belongs to the Python standard library, accommodating different Python versions.
|
|
"""
|
|
import importlib.util
|
|
import logging
|
|
import os
|
|
import sys
|
|
import sysconfig
|
|
from pathlib import Path
|
|
from typing import Optional, Set
|
|
|
|
# --- Logger Configuration ---
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# --- Standard Library Detection Logic ---
|
|
# Attempt to get the frozenset of stdlib modules, introduced in Python 3.10
|
|
if sys.version_info >= (3, 10):
|
|
try:
|
|
# The most reliable method for modern Python versions
|
|
STANDARD_LIBRARY_MODULES: frozenset[str] = sys.stdlib_module_names
|
|
logger.debug(
|
|
"Using sys.stdlib_module_names for standard library module list (Python 3.10+)."
|
|
)
|
|
except AttributeError:
|
|
# Fallback in case the attribute is missing for some reason
|
|
logger.warning(
|
|
"sys.stdlib_module_names not found despite Python 3.10+. Using a predefined list."
|
|
)
|
|
STANDARD_LIBRARY_MODULES = frozenset() # Will be populated by the predefined list
|
|
else:
|
|
# For Python versions older than 3.10
|
|
logger.debug(
|
|
"Using a predefined list for standard library modules (Python < 3.10)."
|
|
)
|
|
STANDARD_LIBRARY_MODULES = frozenset() # Will be populated below
|
|
|
|
# If the modern method is unavailable or failed, use a hardcoded list.
|
|
# This list is extensive but might not be perfectly up-to-date.
|
|
if not STANDARD_LIBRARY_MODULES:
|
|
_PREDEFINED_STDLIBS = {
|
|
"abc", "aifc", "argparse", "array", "ast", "asynchat", "asyncio",
|
|
"asyncore", "atexit", "audioop", "base64", "bdb", "binascii",
|
|
"binhex", "bisect", "builtins", "bz2", "calendar", "cgi", "cgitb",
|
|
"chunk", "cmath", "cmd", "code", "codecs", "codeop", "collections",
|
|
"colorsys", "compileall", "concurrent", "configparser", "contextlib",
|
|
"contextvars", "copy", "copyreg", "cProfile", "crypt", "csv",
|
|
"ctypes", "curses", "dataclasses", "datetime", "dbm", "decimal",
|
|
"difflib", "dis", "distutils", "doctest", "email", "encodings",
|
|
"ensurepip", "enum", "errno", "faulthandler", "fcntl", "filecmp",
|
|
"fileinput", "fnmatch", "formatter", "fractions", "ftplib",
|
|
"functools", "gc", "getopt", "getpass", "gettext", "glob",
|
|
"graphlib", "grp", "gzip", "hashlib", "heapq", "hmac", "html",
|
|
"http", "idlelib", "imaplib", "imghdr", "imp", "importlib",
|
|
"inspect", "io", "ipaddress", "itertools", "json", "keyword",
|
|
"lib2to3", "linecache", "locale", "logging", "lzma", "mailbox",
|
|
"mailcap", "marshal", "math", "mimetypes", "mmap", "modulefinder",
|
|
"multiprocessing", "netrc", "nis", "nntplib", "numbers", "operator",
|
|
"optparse", "os", "ossaudiodev", "parser", "pathlib", "pdb",
|
|
"pickle", "pickletools", "pipes", "pkgutil", "platform", "plistlib",
|
|
"poplib", "posix", "pprint", "profile", "pstats", "pty", "pwd",
|
|
"py_compile", "pyclbr", "pydoc", "pydoc_data", "pyexpat", "queue",
|
|
"quopri", "random", "re", "readline", "reprlib", "resource",
|
|
"rlcompleter", "runpy", "sched", "secrets", "select", "selectors",
|
|
"shelve", "shlex", "shutil", "signal", "site", "smtpd", "smtplib",
|
|
"sndhdr", "socket", "socketserver", "spwd", "sqlite3", "ssl",
|
|
"stat", "statistics", "string", "stringprep", "struct",
|
|
"subprocess", "sunau", "symtable", "sys", "sysconfig", "syslog",
|
|
"tabnanny", "tarfile", "telnetlib", "tempfile", "termios",
|
|
"textwrap", "threading", "time", "timeit", "tkinter", "token",
|
|
"tokenize", "trace", "traceback", "tracemalloc", "tty", "turtle",
|
|
"turtledemo", "types", "typing", "unicodedata", "unittest",
|
|
"urllib", "uu", "uuid", "venv", "warnings", "wave", "weakref",
|
|
"webbrowser", "wsgiref", "xdrlib", "xml", "xmlrpc", "zipapp",
|
|
"zipfile", "zipimport", "zlib", "_thread", "_collections_abc",
|
|
"_json", "_datetime", "_weakrefset", "_strptime", "_socket", "_ssl",
|
|
"_struct", "_queue", "_pickle", "_lsprof", "_heapq", "_hashlib",
|
|
"_csv", "_bz2", "_codecs", "_bisect", "_blake2", "_asyncio", "_ast",
|
|
"_abc",
|
|
}
|
|
STANDARD_LIBRARY_MODULES = frozenset(_PREDEFINED_STDLIBS)
|
|
logger.debug("Populated standard library list from predefined set.")
|
|
|
|
|
|
_CACHED_STD_LIB_PATHS: Optional[Set[str]] = None
|
|
|
|
|
|
def get_standard_library_paths() -> Set[str]:
|
|
"""
|
|
Retrieves and caches normalized paths for the Python standard library.
|
|
This is a fallback/supplement to the module name list.
|
|
"""
|
|
global _CACHED_STD_LIB_PATHS
|
|
if _CACHED_STD_LIB_PATHS is not None:
|
|
return _CACHED_STD_LIB_PATHS
|
|
|
|
paths: Set[str] = set()
|
|
logger.debug("Determining standard library paths for the first time...")
|
|
try:
|
|
# Common paths from sysconfig
|
|
for path_name in ("stdlib", "platstdlib"):
|
|
try:
|
|
path_val = sysconfig.get_path(path_name)
|
|
if path_val and os.path.isdir(path_val):
|
|
paths.add(os.path.normpath(path_val))
|
|
logger.debug(f"Found stdlib path ({path_name}): {path_val}")
|
|
except Exception as e:
|
|
logger.warning(
|
|
f"Could not get sysconfig path '{path_name}': {e}"
|
|
)
|
|
|
|
# Path relative to the Python executable
|
|
prefix_lib_path = os.path.normpath(
|
|
os.path.join(
|
|
sys.prefix,
|
|
"lib",
|
|
f"python{sys.version_info.major}.{sys.version_info.minor}",
|
|
)
|
|
)
|
|
if os.path.isdir(prefix_lib_path):
|
|
paths.add(prefix_lib_path)
|
|
logger.debug(f"Found stdlib path (prefix): {prefix_lib_path}")
|
|
|
|
# Platform-specific paths
|
|
if sys.platform == "win32":
|
|
dlls_path = os.path.join(sys.prefix, "DLLs")
|
|
if os.path.isdir(dlls_path):
|
|
paths.add(os.path.normpath(dlls_path))
|
|
logger.debug(f"Found stdlib path (DLLs): {dlls_path}")
|
|
else:
|
|
dynload_path = os.path.join(
|
|
sys.exec_prefix,
|
|
"lib",
|
|
f"python{sys.version_info.major}.{sys.version_info.minor}",
|
|
"lib-dynload",
|
|
)
|
|
if os.path.isdir(dynload_path):
|
|
paths.add(os.path.normpath(dynload_path))
|
|
logger.debug(f"Found stdlib path (dynload): {dynload_path}")
|
|
|
|
# Framework paths (macOS)
|
|
fw_prefix = sysconfig.get_config_var("PYTHONFRAMEWORKPREFIX")
|
|
if fw_prefix and isinstance(fw_prefix, str) and os.path.isdir(fw_prefix):
|
|
fw_path = os.path.normpath(
|
|
os.path.join(
|
|
fw_prefix,
|
|
"lib",
|
|
f"python{sys.version_info.major}.{sys.version_info.minor}",
|
|
)
|
|
)
|
|
if os.path.isdir(fw_path):
|
|
paths.add(fw_path)
|
|
logger.debug(f"Found stdlib path (Framework): {fw_path}")
|
|
|
|
# Fallback if sysconfig fails, using well-known module locations
|
|
if not paths:
|
|
logger.warning("Sysconfig paths failed, attempting fallback using module locations.")
|
|
try:
|
|
paths.add(os.path.normpath(os.path.dirname(os.__file__)))
|
|
except (AttributeError, TypeError):
|
|
logger.error("Could not determine path for 'os' module.")
|
|
try:
|
|
paths.add(os.path.normpath(os.path.dirname(sysconfig.__file__)))
|
|
except (AttributeError, TypeError):
|
|
logger.error("Could not determine path for 'sysconfig' module.")
|
|
|
|
except Exception as e:
|
|
logger.exception(f"Unexpected error while determining standard library paths: {e}")
|
|
|
|
_CACHED_STD_LIB_PATHS = {p for p in paths if p} # Filter out any empty paths
|
|
|
|
if not _CACHED_STD_LIB_PATHS:
|
|
logger.error("Failed to determine ANY standard library paths. Path-based checks will fail.")
|
|
else:
|
|
logger.debug(f"Final cached standard library paths: {_CACHED_STD_LIB_PATHS}")
|
|
|
|
return _CACHED_STD_LIB_PATHS
|
|
|
|
|
|
def is_path_in_standard_library(file_path_str: Optional[str]) -> bool:
|
|
"""
|
|
Checks if a file path string is within any known standard library directory.
|
|
Excludes 'site-packages' and 'dist-packages' explicitly.
|
|
"""
|
|
if not file_path_str:
|
|
return False
|
|
|
|
std_lib_paths = get_standard_library_paths()
|
|
if not std_lib_paths:
|
|
return False
|
|
|
|
try:
|
|
norm_file_path = os.path.normpath(os.path.abspath(file_path_str))
|
|
|
|
# Quick check to exclude third-party packages
|
|
path_parts = Path(norm_file_path).parts
|
|
if "site-packages" in path_parts or "dist-packages" in path_parts:
|
|
return False
|
|
|
|
# Check if the file path starts with any of the identified stdlib paths
|
|
for std_path in std_lib_paths:
|
|
# Ensure the comparison is robust by checking for the directory separator
|
|
if norm_file_path.startswith(std_path + os.sep):
|
|
return True
|
|
except Exception as e:
|
|
logger.warning(f"Error during path comparison for '{file_path_str}': {e}")
|
|
return False
|
|
|
|
return False
|
|
|
|
|
|
def is_standard_library(module_name: str) -> bool:
|
|
"""
|
|
Checks if a module name belongs to the standard library using multiple strategies.
|
|
|
|
1. Checks against a predefined/system-provided list of names.
|
|
2. Finds the module's specification (`find_spec`) to check its origin.
|
|
3. If the origin is a file path, checks if it's in a standard library directory.
|
|
"""
|
|
# Strategy 1: Check against the reliable name list
|
|
if module_name in STANDARD_LIBRARY_MODULES:
|
|
logger.debug(f"'{module_name}' is in the standard library name set.")
|
|
return True
|
|
|
|
# Strategy 2: Use importlib to find the module's specification
|
|
try:
|
|
spec = importlib.util.find_spec(module_name)
|
|
except (ValueError, ModuleNotFoundError, Exception) as e:
|
|
# Catching specific errors and a general Exception is safer than a bare except.
|
|
# ValueError can occur for invalid names, ModuleNotFoundError for non-existent parents.
|
|
logger.debug(
|
|
f"Could not find spec for '{module_name}' (error: {e}). Assuming it's non-standard."
|
|
)
|
|
return False
|
|
|
|
if spec is None:
|
|
logger.debug(f"No spec found for '{module_name}'. Assuming non-standard.")
|
|
return False
|
|
|
|
origin = spec.origin
|
|
logger.debug(f"Module '{module_name}' has origin: '{origin}'")
|
|
|
|
# 'built-in' or 'frozen' modules are part of the standard library
|
|
if origin in ("built-in", "frozen"):
|
|
logger.debug(f"'{module_name}' is a '{origin}' module.")
|
|
return True
|
|
|
|
# Strategy 3: Check if the module's file path is in a stdlib directory
|
|
if is_path_in_standard_library(origin):
|
|
logger.debug(f"Path for '{module_name}' ('{origin}') is a standard library path.")
|
|
return True
|
|
|
|
logger.debug(f"'{module_name}' is not classified as standard library.")
|
|
return False |