From 26d37fbd72c323072f02339101585703641e2626 Mon Sep 17 00:00:00 2001 From: VALLONGOL Date: Tue, 6 May 2025 14:37:18 +0200 Subject: [PATCH] update version --- dependencyanalyzer/core.py | 102 ++++++++++++++++++++++++------------- dependencyanalyzer/gui.py | 55 ++++++++++++++++---- 2 files changed, 114 insertions(+), 43 deletions(-) diff --git a/dependencyanalyzer/core.py b/dependencyanalyzer/core.py index e61c090..4990324 100644 --- a/dependencyanalyzer/core.py +++ b/dependencyanalyzer/core.py @@ -145,65 +145,99 @@ class ImportExtractor(ast.NodeVisitor): if node.module and node.level == 0: module_name = node.module.split('.')[0]; if module_name: self.imported_modules.add((module_name, self.file_path_str)) + + DependencyInfo = Dict[str, Dict[str, Union[Set[str], Optional[str], str]]] -def find_project_modules_and_dependencies(repo_path: Path) -> Tuple[DependencyInfo, DependencyInfo]: - """Analyzes Python files, returns info on standard and external dependencies.""" - all_imports_locations: Dict[str, Set[str]] = {}; project_modules: Set[str] = set() - logger.info(f"Starting analysis: Identifying project modules in '{repo_path}'...") - try: # Simple project module identification - for item in repo_path.rglob('*'): - if item.is_dir() and (item / '__init__.py').exists(): - try: - rel_dir = item.relative_to(repo_path); - if rel_dir.parts: project_modules.add(rel_dir.parts[0]) - except ValueError: pass - elif item.is_file() and item.suffix == '.py' and item.name != '__init__.py': - try: - rel_file = item.relative_to(repo_path) - if len(rel_file.parts) == 1: project_modules.add(item.stem) - elif len(rel_file.parts) > 1 and (item.parent / '__init__.py').exists(): project_modules.add(rel_file.parts[0]) - except ValueError: pass - except Exception as e: logger.error(f"Error identifying project modules: {e}") - logger.debug(f"Potential project modules: {project_modules}") +def find_project_modules_and_dependencies( + repo_path: Path, # Original user-selected path + scan_path: Path # Path where the actual scanning begins + ) -> Tuple[DependencyInfo, DependencyInfo]: + """ + Analyzes Python files starting from scan_path, identifies project modules + relative to scan_path, and finds standard/external dependencies. + Explicitly ignores imports matching the name of the scan_path directory if + scan_path is different from repo_path (assuming it's the main package). - logger.info(f"Analyzing Python files for imports...") + Args: + repo_path (Path): The root path selected by the user. + scan_path (Path): The directory to actually scan for source code. + + Returns: + Tuple[DependencyInfo, DependencyInfo]: std_lib_info, external_deps_info + """ + all_imports_locations: Dict[str, Set[str]] = {} + # project_modules: Set[str] = set() # Identifying project modules is complex, let's rely on scan_path name + + # --- NUOVA LOGICA: Identifica il nome del pacchetto principale (se si scansiona sottocartella) --- + main_project_package_name: Optional[str] = None + if repo_path != scan_path and scan_path.name == repo_path.name.lower(): + main_project_package_name = scan_path.name + logger.info(f"Assuming '{main_project_package_name}' is the main project package being scanned.") + # --- FINE NUOVA LOGICA --- + + + # Identify potential project modules *within* scan_path can still be useful + # but let's simplify the primary check based on main_project_package_name first. + # logger.info(f"Analysis target: Identifying project modules within '{scan_path}'...") + # ... (previous logic for project_modules identification removed for simplification, + # could be added back if needed for more complex internal structures) + + logger.info(f"Analyzing Python files for imports starting from '{scan_path}'...") excluded_dirs = {'venv', '.venv', 'env', '.env', 'docs', 'tests', 'test', 'site-packages', 'dist-packages', '__pycache__', '.git', '.hg', '.svn', '.tox', '.nox', 'build', 'dist', '*.egg-info'} file_count = 0 - for root, dirs, files in os.walk(repo_path, topdown=True): + for root, dirs, files in os.walk(scan_path, topdown=True): dirs[:] = [d for d in dirs if d not in excluded_dirs and not d.startswith('.')] + current_root_path = Path(root) for file_name in files: if file_name.endswith(".py"): - file_path = Path(root) / file_name; file_count += 1 - try: rel_path_str = str(file_path.relative_to(repo_path)) - except ValueError: rel_path_str = str(file_path); logger.warning(f"Path not relative: {file_path}") + file_path_obj = current_root_path / file_name; file_count += 1 + try: report_rel_path_str = str(file_path_obj.relative_to(repo_path)) + except ValueError: report_rel_path_str = str(file_path_obj); logger.warning(f"Path not relative: {file_path_obj}") + logger.debug(f"Parsing: {report_rel_path_str}") try: - with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: source = f.read() - tree = ast.parse(source, filename=str(file_path)) - extractor = ImportExtractor(file_path_str=rel_path_str); extractor.visit(tree) - for module, rel_path in extractor.imported_modules: - if module: all_imports_locations.setdefault(module, set()).add(rel_path) - except SyntaxError as e: logger.warning(f"Syntax error in '{rel_path_str}': {e}. Skipping.") - except Exception as e: logger.exception(f"Error processing file '{rel_path_str}': {e}") + with open(file_path_obj, 'r', encoding='utf-8', errors='ignore') as f: source = f.read() + tree = ast.parse(source, filename=str(file_path_obj)) + extractor = ImportExtractor(file_path_str=report_rel_path_str) + extractor.visit(tree) + for module, report_rel_path in extractor.imported_modules: + if module: all_imports_locations.setdefault(module, set()).add(report_rel_path) + except SyntaxError as e: logger.warning(f"Syntax error in '{report_rel_path_str}': {e}. Skipping.") + except Exception as e: logger.exception(f"Error processing file '{report_rel_path_str}': {e}") + logger.info(f"Analyzed {file_count} Python files. Found {len(all_imports_locations)} unique top-level imports.") - logger.info("Classifying imports and fetching versions...") std_libs: DependencyInfo = {}; external_deps: DependencyInfo = {} + for imp_module, locs in all_imports_locations.items(): - if imp_module in project_modules: logger.debug(f"Skipping project module: '{imp_module}'"); continue + # --- MODIFIED CHECK: Ignore if it matches the main project package name --- + if main_project_package_name and imp_module == main_project_package_name: + logger.info(f"Skipping '{imp_module}' as it matches the main project package name being scanned.") + continue + # --- FINE MODIFIED CHECK --- + + # Remove the check against the complex 'project_modules' set for now + # if imp_module in project_modules: logger.debug(f"Skipping project module: '{imp_module}'"); continue + if imp_module in FALSE_POSITIVE_EXTERNAL_MODULES: logger.info(f"Skipping known false positive: '{imp_module}'"); continue - if _is_standard_library(imp_module): logger.debug(f"'{imp_module}' is standard."); std_libs[imp_module] = {'locations': locs, 'version': None} + + if _is_standard_library(imp_module): + logger.debug(f"'{imp_module}' is standard library.") + std_libs[imp_module] = {'locations': locs, 'version': None} else: + # External dependency processing (mapping, version check) pypi_name = MODULE_NAME_TO_PACKAGE_NAME_MAP.get(imp_module, imp_module) orig_imp = imp_module if pypi_name != imp_module else None logger.debug(f"'{imp_module}' (PyPI: '{pypi_name}') is external. Fetching version...") version: Optional[str] = None try: version = importlib.metadata.version(pypi_name) except: logger.warning(f"Version for '{pypi_name}' not found.") + dep_data = external_deps.setdefault(pypi_name, {'locations': set(), 'version': version, 'original_import_name': None}) dep_data['locations'].update(locs); # type: ignore if orig_imp and dep_data.get('original_import_name') is None: dep_data['original_import_name'] = orig_imp if dep_data.get('version') is None and version is not None: dep_data['version'] = version + logger.info(f"Classification complete: {len(std_libs)} stdlib used, {len(external_deps)} unique external dependencies.") return std_libs, external_deps diff --git a/dependencyanalyzer/gui.py b/dependencyanalyzer/gui.py index a40ce66..2c331b6 100644 --- a/dependencyanalyzer/gui.py +++ b/dependencyanalyzer/gui.py @@ -262,22 +262,59 @@ class DependencyAnalyzerApp(tk.Frame): # --- Analysis and Requirements Generation --- def _analyze_and_generate_reqs_threaded(self) -> None: + """Starts the background thread for analysis and requirements generation.""" if not self.selected_repository_path: self._log_message("Please select a repository first.", "WARNING") # User feedback return + # Start the background task self._run_long_task_threaded( - self._perform_analysis_and_generation, - callback_success=self._analysis_and_generation_callback # keyword + self._perform_analysis_and_generation, # Function to run in thread + # No specific arguments needed here for the task function itself + callback_success=self._analysis_and_generation_callback # Keyword arg ) def _perform_analysis_and_generation(self) -> Tuple[Path, core.DependencyInfo, core.DependencyInfo]: - """Actual analysis and file generation logic (runs in thread).""" + """ + Determines scan path, performs analysis via core function, generates file. + This method runs in the background thread. + Uses self.selected_repository_path set by the GUI. + """ if not self.selected_repository_path: - raise ValueError("Repository path not selected.") - # core functions log internally using their logger (which propagates to root) - core.find_main_script(self.selected_repository_path) - std_lib_info, external_info = core.find_project_modules_and_dependencies(self.selected_repository_path) - req_file_path = core.generate_requirements_file(self.selected_repository_path, external_info, std_lib_info) + # This should ideally not be reached if called correctly, but added safeguard + raise ValueError("Repository path is not selected when analysis task started.") + + repo_path = self.selected_repository_path + repo_name_lower = repo_path.name.lower() + potential_sub_package_path = repo_path / repo_name_lower + + scan_path: Path # Define the path to start scanning from + + # Check if a sub-directory with the lowercased repo name exists + if potential_sub_package_path.is_dir(): + logging.info(f"Found sub-directory '{repo_name_lower}', scanning within it.") + scan_path = potential_sub_package_path + else: + logging.info(f"Sub-directory '{repo_name_lower}' not found, scanning the selected repository root '{repo_path}'.") + scan_path = repo_path + + # Optional: Find main script (less critical now but kept for potential info) + # This logic might also need adjustment based on scan_path vs repo_path if needed elsewhere + core.find_main_script(repo_path) # Still checks relative to repo_path + + # Call the core analysis function with both repo_path and scan_path + std_lib_info, external_info = core.find_project_modules_and_dependencies( + repo_path=repo_path, # Pass original root for relative paths + scan_path=scan_path # Pass the determined path to actually scan + ) + + # Generate requirements file (uses repo_path for the output file location) + req_file_path = core.generate_requirements_file( + repo_path, # Output in the root selected by user + external_info, + std_lib_info + ) + + # Return all necessary results for the callback return req_file_path, std_lib_info, external_info def _analysis_and_generation_callback(self, result: Tuple[Path, core.DependencyInfo, core.DependencyInfo]) -> None: @@ -287,7 +324,7 @@ class DependencyAnalyzerApp(tk.Frame): self.std_lib_deps_info = std_lib_info self.external_deps_info = external_info self.extracted_dependencies_names = set(self.external_deps_info.keys()) - logging.info(f"Analysis and requirements.txt generation complete. File: {self.requirements_file_path}") # Log via root + logging.info(f"Analysis and requirements generation complete. File: {self.requirements_file_path}") # Log via root self._populate_modules_tree() def _populate_modules_tree(self) -> None: