From 5a1063482a30fb072d26ca62f86df5a16b57fe18 Mon Sep 17 00:00:00 2001 From: Karthik Nadig Date: Fri, 13 Feb 2026 10:46:59 -0800 Subject: [PATCH 1/7] Add analysis modules for code health and technical debt tracking --- .github/workflows/code-analysis.yml | 62 +++++ .gitignore | 5 +- analysis/__init__.py | 4 + analysis/complexity_analysis.py | 345 +++++++++++++++++++++++ analysis/debt_indicators.py | 418 ++++++++++++++++++++++++++++ analysis/dependency_analysis.py | 388 ++++++++++++++++++++++++++ analysis/git_analysis.py | 391 ++++++++++++++++++++++++++ analysis/pyproject.toml | 20 ++ analysis/snapshot.py | 288 +++++++++++++++++++ 9 files changed, 1920 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/code-analysis.yml create mode 100644 analysis/__init__.py create mode 100644 analysis/complexity_analysis.py create mode 100644 analysis/debt_indicators.py create mode 100644 analysis/dependency_analysis.py create mode 100644 analysis/git_analysis.py create mode 100644 analysis/pyproject.toml create mode 100644 analysis/snapshot.py diff --git a/.github/workflows/code-analysis.yml b/.github/workflows/code-analysis.yml new file mode 100644 index 00000000..c74fea52 --- /dev/null +++ b/.github/workflows/code-analysis.yml @@ -0,0 +1,62 @@ +name: Code Health Analysis + +on: + push: + branches: + - 'main' + workflow_dispatch: # Allow manual trigger + +jobs: + generate-snapshot: + name: Generate Code Health Snapshot + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 # Full history for git analysis + + - name: Install Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Install uv + run: | + curl -LsSf https://astral.sh/uv/install.sh | sh + echo "$HOME/.local/bin" >> $GITHUB_PATH + + - name: Install analysis dependencies + working-directory: analysis + run: | + uv pip install --system -r pyproject.toml + + - name: Generate snapshot + working-directory: analysis + run: | + python snapshot.py --output ../analysis-snapshot.json + + - name: Upload snapshot artifact + uses: actions/upload-artifact@v4 + with: + name: code-health-snapshot-${{ github.sha }} + path: analysis-snapshot.json + retention-days: 90 + if-no-files-found: error + + - name: Print summary + run: | + echo "## Code Health Snapshot Generated" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "**Commit:** \`${{ github.sha }}\`" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "### Summary Metrics" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + jq -r '.summary | to_entries | .[] | "- **\(.key):** \(.value)"' analysis-snapshot.json >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "### Top Priority Hotspots" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "| File | Changes | Complexity | Priority Score |" >> $GITHUB_STEP_SUMMARY + echo "|------|---------|------------|----------------|" >> $GITHUB_STEP_SUMMARY + jq -r '.priority_hotspots[:5] | .[] | "| \(.path) | \(.change_count) | \(.max_complexity) | \(.priority_score) |"' analysis-snapshot.json >> $GITHUB_STEP_SUMMARY diff --git a/.gitignore b/.gitignore index b9494517..b5063653 100644 --- a/.gitignore +++ b/.gitignore @@ -8,4 +8,7 @@ node_modules **/__pycache__/ # Folder for storing AI generated artifacts -ai-artifacts/* \ No newline at end of file +ai-artifacts/* + +# Code health analysis snapshots (generated, uploaded as artifacts) +analysis-snapshot.json \ No newline at end of file diff --git a/analysis/__init__.py b/analysis/__init__.py new file mode 100644 index 00000000..457aa493 --- /dev/null +++ b/analysis/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +"""Code health analysis tools for tracking technical debt.""" diff --git a/analysis/complexity_analysis.py b/analysis/complexity_analysis.py new file mode 100644 index 00000000..523c39d7 --- /dev/null +++ b/analysis/complexity_analysis.py @@ -0,0 +1,345 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +"""Static code complexity analysis using radon for Python and regex patterns for TypeScript.""" + +import pathlib +import re +from dataclasses import dataclass +from typing import Dict, List, Optional + +import pathspec + +# Radon is optional - graceful fallback if not available +try: + from radon.complexity import cc_visit + from radon.metrics import mi_visit + + RADON_AVAILABLE = True +except ImportError: + RADON_AVAILABLE = False + + +@dataclass +class FunctionComplexity: + """Complexity metrics for a single function/method.""" + + name: str + line: int + complexity: int + length: int # lines of code + + def to_dict(self) -> dict: + return { + "name": self.name, + "line": self.line, + "complexity": self.complexity, + "length": self.length, + } + + +@dataclass +class FileComplexity: + """Complexity metrics for a file.""" + + path: str + total_lines: int + code_lines: int + functions: List[FunctionComplexity] + max_complexity: int + avg_complexity: float + maintainability_index: Optional[float] = None + + def to_dict(self) -> dict: + return { + "path": self.path, + "total_lines": self.total_lines, + "code_lines": self.code_lines, + "function_count": len(self.functions), + "max_complexity": self.max_complexity, + "avg_complexity": round(self.avg_complexity, 2), + "maintainability_index": round(self.maintainability_index, 2) + if self.maintainability_index + else None, + "functions": [f.to_dict() for f in self.functions], + } + + +def load_gitignore(repo_root: pathlib.Path) -> Optional[pathspec.PathSpec]: + """Load .gitignore patterns.""" + gitignore_path = repo_root / ".gitignore" + if not gitignore_path.exists(): + return None + + with open(gitignore_path, "r", encoding="utf-8") as f: + patterns = f.read().splitlines() + + return pathspec.PathSpec.from_lines("gitwildmatch", patterns) + + +def should_analyze_file( + filepath: pathlib.Path, + repo_root: pathlib.Path, + gitignore: Optional[pathspec.PathSpec], +) -> bool: + """Check if a file should be analyzed.""" + rel_path = filepath.relative_to(repo_root).as_posix() + + # Skip common non-source directories + skip_dirs = { + "node_modules", + "dist", + "out", + ".venv", + "__pycache__", + ".git", + ".vscode-test", + } + for part in filepath.parts: + if part in skip_dirs: + return False + + # Skip if matched by gitignore + if gitignore and gitignore.match_file(rel_path): + return False + + return True + + +def analyze_python_file( + filepath: pathlib.Path, repo_root: pathlib.Path +) -> Optional[FileComplexity]: + """Analyze a Python file for complexity metrics.""" + if not RADON_AVAILABLE: + return None + + try: + content = filepath.read_text(encoding="utf-8") + except (UnicodeDecodeError, OSError): + return None + + lines = content.splitlines() + total_lines = len(lines) + code_lines = sum( + 1 for line in lines if line.strip() and not line.strip().startswith("#") + ) + + try: + cc_results = cc_visit(content) + mi_score = mi_visit(content, multi=False) + except SyntaxError: + return None + + functions = [] + for block in cc_results: + # radon returns different block types (Function, Class, etc.) + func = FunctionComplexity( + name=block.name, + line=block.lineno, + complexity=block.complexity, + length=block.endline - block.lineno + 1 if hasattr(block, "endline") else 0, + ) + functions.append(func) + + max_cc = max((f.complexity for f in functions), default=0) + avg_cc = sum(f.complexity for f in functions) / len(functions) if functions else 0 + + rel_path = filepath.relative_to(repo_root).as_posix() + return FileComplexity( + path=rel_path, + total_lines=total_lines, + code_lines=code_lines, + functions=functions, + max_complexity=max_cc, + avg_complexity=avg_cc, + maintainability_index=mi_score, + ) + + +def analyze_typescript_file( + filepath: pathlib.Path, repo_root: pathlib.Path +) -> Optional[FileComplexity]: + """Analyze a TypeScript file for complexity metrics using regex patterns. + + This is a simplified analysis - for accurate TypeScript complexity, + consider using ts-morph or typescript compiler API. + """ + try: + content = filepath.read_text(encoding="utf-8") + except (UnicodeDecodeError, OSError): + return None + + lines = content.splitlines() + total_lines = len(lines) + code_lines = sum( + 1 for line in lines if line.strip() and not line.strip().startswith("//") + ) + + # Find function/method definitions + # Matches: function name, async function name, methodName(, async methodName( + function_pattern = re.compile( + r"^\s*(?:export\s+)?(?:async\s+)?(?:function\s+(\w+)|(\w+)\s*(?:<[^>]*>)?\s*\([^)]*\)\s*(?::\s*[^{]+)?\s*\{)", + re.MULTILINE, + ) + + # Complexity indicators (simplified cyclomatic complexity estimation) + branch_patterns = [ + r"\bif\s*\(", + r"\belse\s+if\s*\(", + r"\belse\s*\{", + r"\bfor\s*\(", + r"\bwhile\s*\(", + r"\bswitch\s*\(", + r"\bcase\s+", + r"\bcatch\s*\(", + r"\b\?\s*[^:]+\s*:", # ternary + r"\?\?", # nullish coalescing + r"\|\|", # logical or + r"&&", # logical and + ] + + functions = [] + func_matches = list(function_pattern.finditer(content)) + + for i, match in enumerate(func_matches): + func_name = match.group(1) or match.group(2) or "anonymous" + start_line = content[: match.start()].count("\n") + 1 + + # Find function end (rough estimate - count braces) + func_start = match.end() + func_end = len(content) + + if i + 1 < len(func_matches): + func_end = func_matches[i + 1].start() + + func_content = content[match.start() : func_end] + + # Count complexity + complexity = 1 # Base complexity + for pattern in branch_patterns: + complexity += len(re.findall(pattern, func_content)) + + length = func_content.count("\n") + 1 + + functions.append( + FunctionComplexity( + name=func_name, + line=start_line, + complexity=complexity, + length=length, + ) + ) + + max_cc = max((f.complexity for f in functions), default=0) + avg_cc = sum(f.complexity for f in functions) / len(functions) if functions else 0 + + rel_path = filepath.relative_to(repo_root).as_posix() + return FileComplexity( + path=rel_path, + total_lines=total_lines, + code_lines=code_lines, + functions=functions, + max_complexity=max_cc, + avg_complexity=avg_cc, + maintainability_index=None, # Not computed for TypeScript + ) + + +def find_source_files( + repo_root: pathlib.Path, extensions: List[str] +) -> List[pathlib.Path]: + """Find all source files with given extensions.""" + gitignore = load_gitignore(repo_root) + files = [] + + for ext in extensions: + for filepath in repo_root.rglob(f"*{ext}"): + if should_analyze_file(filepath, repo_root, gitignore): + files.append(filepath) + + return files + + +def analyze_complexity(repo_root: pathlib.Path) -> dict: + """Run complexity analysis on the repository. + + Returns: + Dictionary with complexity metrics for all analyzed files + """ + results: Dict[str, List[dict]] = { + "python": [], + "typescript": [], + } + + # Analyze Python files + python_files = find_source_files(repo_root, [".py"]) + for filepath in python_files: + file_complexity = analyze_python_file(filepath, repo_root) + if file_complexity: + results["python"].append(file_complexity.to_dict()) + + # Analyze TypeScript files + ts_files = find_source_files(repo_root, [".ts"]) + for filepath in ts_files: + file_complexity = analyze_typescript_file(filepath, repo_root) + if file_complexity: + results["typescript"].append(file_complexity.to_dict()) + + # Compute summary statistics + all_files = results["python"] + results["typescript"] + + summary = { + "total_files": len(all_files), + "total_functions": sum(f["function_count"] for f in all_files), + "total_lines": sum(f["total_lines"] for f in all_files), + "total_code_lines": sum(f["code_lines"] for f in all_files), + "files_with_high_complexity": [ + f["path"] for f in all_files if f["max_complexity"] > 10 + ], + "avg_file_complexity": round( + sum(f["avg_complexity"] for f in all_files) / len(all_files), 2 + ) + if all_files + else 0, + } + + # Sort files by max complexity (most complex first) + results["python"].sort(key=lambda f: f["max_complexity"], reverse=True) + results["typescript"].sort(key=lambda f: f["max_complexity"], reverse=True) + + return { + "by_language": results, + "summary": summary, + "high_complexity_functions": _get_high_complexity_functions( + all_files, threshold=10 + ), + } + + +def _get_high_complexity_functions( + files: List[dict], threshold: int = 10 +) -> List[dict]: + """Extract functions with complexity above threshold.""" + high_cc = [] + for file_data in files: + for func in file_data.get("functions", []): + if func["complexity"] > threshold: + high_cc.append( + { + "file": file_data["path"], + "function": func["name"], + "line": func["line"], + "complexity": func["complexity"], + } + ) + + high_cc.sort(key=lambda f: f["complexity"], reverse=True) + return high_cc[:30] # Top 30 + + +if __name__ == "__main__": + import json + + repo = pathlib.Path(__file__).parent.parent + result = analyze_complexity(repo) + print(json.dumps(result, indent=2)) diff --git a/analysis/debt_indicators.py b/analysis/debt_indicators.py new file mode 100644 index 00000000..dc9b3e98 --- /dev/null +++ b/analysis/debt_indicators.py @@ -0,0 +1,418 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +"""Technical debt indicator detection. + +Scans code for common debt markers like TODO comments, large files, +long functions, and code smells. +""" + +import pathlib +import re +from dataclasses import dataclass +from typing import Dict, List, Optional + +import pathspec + + +@dataclass +class DebtMarker: + """A technical debt marker found in code.""" + + file: str + line: int + marker_type: str # TODO, FIXME, HACK, XXX, etc. + text: str + + def to_dict(self) -> dict: + return { + "file": self.file, + "line": self.line, + "type": self.marker_type, + "text": self.text[:200], # Truncate long comments + } + + +@dataclass +class LargeFile: + """A file that exceeds size thresholds.""" + + path: str + total_lines: int + code_lines: int + + def to_dict(self) -> dict: + return { + "path": self.path, + "total_lines": self.total_lines, + "code_lines": self.code_lines, + } + + +@dataclass +class LongFunction: + """A function that exceeds length thresholds.""" + + file: str + function_name: str + line: int + length: int + + def to_dict(self) -> dict: + return { + "file": self.file, + "function": self.function_name, + "line": self.line, + "length": self.length, + } + + +# Debt marker patterns +DEBT_PATTERNS = [ + (r"#\s*(TODO|FIXME|HACK|XXX|BUG|REFACTOR|OPTIMIZE|REVIEW)[:|\s](.+)$", "python"), + ( + r"//\s*(TODO|FIXME|HACK|XXX|BUG|REFACTOR|OPTIMIZE|REVIEW)[:|\s](.+)$", + "typescript", + ), + ( + r"/\*\s*(TODO|FIXME|HACK|XXX|BUG|REFACTOR|OPTIMIZE|REVIEW)[:|\s](.+?)\*/", + "typescript", + ), +] + +# Thresholds (configurable) +LARGE_FILE_THRESHOLD = 500 # lines of code +LONG_FUNCTION_THRESHOLD = 50 # lines + + +def load_gitignore(repo_root: pathlib.Path) -> Optional[pathspec.PathSpec]: + """Load .gitignore patterns.""" + gitignore_path = repo_root / ".gitignore" + if not gitignore_path.exists(): + return None + + with open(gitignore_path, "r", encoding="utf-8") as f: + patterns = f.read().splitlines() + + return pathspec.PathSpec.from_lines("gitwildmatch", patterns) + + +def should_analyze_file( + filepath: pathlib.Path, + repo_root: pathlib.Path, + gitignore: Optional[pathspec.PathSpec], +) -> bool: + """Check if a file should be analyzed.""" + rel_path = filepath.relative_to(repo_root).as_posix() + + # Skip common non-source directories + skip_dirs = { + "node_modules", + "dist", + "out", + ".venv", + "__pycache__", + ".git", + ".vscode-test", + } + for part in filepath.parts: + if part in skip_dirs: + return False + + # Skip if matched by gitignore + if gitignore and gitignore.match_file(rel_path): + return False + + return True + + +def find_debt_markers( + filepath: pathlib.Path, repo_root: pathlib.Path +) -> List[DebtMarker]: + """Find TODO/FIXME/HACK markers in a file.""" + try: + content = filepath.read_text(encoding="utf-8") + except (UnicodeDecodeError, OSError): + return [] + + rel_path = filepath.relative_to(repo_root).as_posix() + markers = [] + + # Determine file type + suffix = filepath.suffix.lower() + if suffix == ".py": + file_type = "python" + elif suffix in {".ts", ".js", ".tsx", ".jsx"}: + file_type = "typescript" + else: + return [] + + for pattern, pattern_type in DEBT_PATTERNS: + if pattern_type != file_type: + continue + + for line_num, line in enumerate(content.splitlines(), start=1): + match = re.search(pattern, line, re.IGNORECASE) + if match: + marker_type = match.group(1).upper() + text = match.group(2).strip() if match.lastindex >= 2 else "" + markers.append( + DebtMarker( + file=rel_path, + line=line_num, + marker_type=marker_type, + text=text, + ) + ) + + return markers + + +def analyze_file_size( + filepath: pathlib.Path, repo_root: pathlib.Path +) -> Optional[LargeFile]: + """Check if a file exceeds size thresholds.""" + try: + content = filepath.read_text(encoding="utf-8") + except (UnicodeDecodeError, OSError): + return None + + lines = content.splitlines() + total_lines = len(lines) + + # Count code lines (non-empty, non-comment) + suffix = filepath.suffix.lower() + if suffix == ".py": + code_lines = sum( + 1 for line in lines if line.strip() and not line.strip().startswith("#") + ) + elif suffix in {".ts", ".js", ".tsx", ".jsx"}: + code_lines = sum( + 1 for line in lines if line.strip() and not line.strip().startswith("//") + ) + else: + code_lines = total_lines + + if code_lines > LARGE_FILE_THRESHOLD: + rel_path = filepath.relative_to(repo_root).as_posix() + return LargeFile(path=rel_path, total_lines=total_lines, code_lines=code_lines) + + return None + + +def find_long_functions_python( + filepath: pathlib.Path, repo_root: pathlib.Path +) -> List[LongFunction]: + """Find Python functions that exceed length threshold.""" + try: + content = filepath.read_text(encoding="utf-8") + except (UnicodeDecodeError, OSError): + return [] + + rel_path = filepath.relative_to(repo_root).as_posix() + lines = content.splitlines() + long_funcs = [] + + # Simple pattern to find function definitions + func_pattern = re.compile(r"^(\s*)(?:async\s+)?def\s+(\w+)\s*\(") + + current_func = None + current_indent = 0 + func_start = 0 + + for i, line in enumerate(lines): + match = func_pattern.match(line) + if match: + # Check previous function + if current_func: + length = i - func_start + if length > LONG_FUNCTION_THRESHOLD: + long_funcs.append( + LongFunction( + file=rel_path, + function_name=current_func, + line=func_start + 1, + length=length, + ) + ) + + current_indent = len(match.group(1)) + current_func = match.group(2) + func_start = i + + # Detect when we've left the current function (dedent to same or less level) + elif current_func and line.strip(): + line_indent = len(line) - len(line.lstrip()) + if line_indent <= current_indent and not line.strip().startswith("#"): + # End of function + length = i - func_start + if length > LONG_FUNCTION_THRESHOLD: + long_funcs.append( + LongFunction( + file=rel_path, + function_name=current_func, + line=func_start + 1, + length=length, + ) + ) + current_func = None + + # Check last function + if current_func: + length = len(lines) - func_start + if length > LONG_FUNCTION_THRESHOLD: + long_funcs.append( + LongFunction( + file=rel_path, + function_name=current_func, + line=func_start + 1, + length=length, + ) + ) + + return long_funcs + + +def find_long_functions_typescript( + filepath: pathlib.Path, repo_root: pathlib.Path +) -> List[LongFunction]: + """Find TypeScript functions that exceed length threshold.""" + try: + content = filepath.read_text(encoding="utf-8") + except (UnicodeDecodeError, OSError): + return [] + + rel_path = filepath.relative_to(repo_root).as_posix() + lines = content.splitlines() + long_funcs = [] + + # Simplified pattern for function definitions + func_pattern = re.compile( + r"^\s*(?:export\s+)?(?:async\s+)?(?:function\s+(\w+)|(\w+)\s*(?:<[^>]*>)?\s*\([^)]*\)\s*(?::\s*[^{]+)?\s*\{)" + ) + + i = 0 + while i < len(lines): + match = func_pattern.match(lines[i]) + if match: + func_name = match.group(1) or match.group(2) or "anonymous" + func_start = i + + # Count braces to find function end + brace_count = 0 + found_open = False + + for j in range(i, len(lines)): + line = lines[j] + for char in line: + if char == "{": + brace_count += 1 + found_open = True + elif char == "}": + brace_count -= 1 + + if found_open and brace_count == 0: + length = j - func_start + 1 + if length > LONG_FUNCTION_THRESHOLD: + long_funcs.append( + LongFunction( + file=rel_path, + function_name=func_name, + line=func_start + 1, + length=length, + ) + ) + i = j + break + else: + # Reached end without closing brace + break + + i += 1 + + return long_funcs + + +def analyze_debt(repo_root: pathlib.Path) -> dict: + """Run complete debt indicator analysis. + + Returns: + Dictionary with all debt indicators + """ + gitignore = load_gitignore(repo_root) + + all_markers: List[DebtMarker] = [] + large_files: List[LargeFile] = [] + long_functions: List[LongFunction] = [] + + # Find all source files + extensions = [".py", ".ts", ".js", ".tsx", ".jsx"] + source_files = [] + + for ext in extensions: + for filepath in repo_root.rglob(f"*{ext}"): + if should_analyze_file(filepath, repo_root, gitignore): + source_files.append(filepath) + + # Analyze each file + for filepath in source_files: + # Find debt markers + markers = find_debt_markers(filepath, repo_root) + all_markers.extend(markers) + + # Check file size + large_file = analyze_file_size(filepath, repo_root) + if large_file: + large_files.append(large_file) + + # Find long functions + suffix = filepath.suffix.lower() + if suffix == ".py": + long_funcs = find_long_functions_python(filepath, repo_root) + elif suffix in {".ts", ".js", ".tsx", ".jsx"}: + long_funcs = find_long_functions_typescript(filepath, repo_root) + else: + long_funcs = [] + + long_functions.extend(long_funcs) + + # Group markers by type + markers_by_type: Dict[str, List[dict]] = {} + for marker in all_markers: + if marker.marker_type not in markers_by_type: + markers_by_type[marker.marker_type] = [] + markers_by_type[marker.marker_type].append(marker.to_dict()) + + # Sort large files and long functions + large_files.sort(key=lambda f: f.code_lines, reverse=True) + long_functions.sort(key=lambda f: f.length, reverse=True) + + return { + "debt_markers": { + "by_type": markers_by_type, + "total_count": len(all_markers), + "summary": { + marker_type: len(markers) + for marker_type, markers in markers_by_type.items() + }, + }, + "large_files": { + "files": [f.to_dict() for f in large_files], + "count": len(large_files), + "threshold": LARGE_FILE_THRESHOLD, + }, + "long_functions": { + "functions": [f.to_dict() for f in long_functions[:50]], # Top 50 + "count": len(long_functions), + "threshold": LONG_FUNCTION_THRESHOLD, + }, + "files_analyzed": len(source_files), + } + + +if __name__ == "__main__": + import json + + repo = pathlib.Path(__file__).parent.parent + result = analyze_debt(repo) + print(json.dumps(result, indent=2)) diff --git a/analysis/dependency_analysis.py b/analysis/dependency_analysis.py new file mode 100644 index 00000000..90bb5b4c --- /dev/null +++ b/analysis/dependency_analysis.py @@ -0,0 +1,388 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +"""Dependency and coupling analysis for TypeScript/JavaScript modules. + +Analyzes import/export patterns to identify: +- Module dependencies +- Circular dependencies +- Highly coupled modules (too many imports) +- Fan-in/fan-out metrics +""" + +import pathlib +import re +from collections import defaultdict +from dataclasses import dataclass, field +from typing import Dict, List, Optional, Set, Tuple + +import pathspec + + +@dataclass +class ModuleInfo: + """Information about a single module's dependencies.""" + + path: str + imports: Set[str] = field(default_factory=set) # Modules this imports + imported_by: Set[str] = field(default_factory=set) # Modules that import this + + @property + def fan_out(self) -> int: + """Number of modules this depends on (outgoing edges).""" + return len(self.imports) + + @property + def fan_in(self) -> int: + """Number of modules that depend on this (incoming edges).""" + return len(self.imported_by) + + @property + def instability(self) -> float: + """Instability metric: fan_out / (fan_in + fan_out). + + 0 = maximally stable (nothing depends on this module's dependencies) + 1 = maximally unstable (all dependencies, no dependents) + """ + total = self.fan_in + self.fan_out + if total == 0: + return 0.0 + return self.fan_out / total + + def to_dict(self) -> dict: + return { + "path": self.path, + "fan_out": self.fan_out, + "fan_in": self.fan_in, + "instability": round(self.instability, 3), + "imports": sorted(self.imports), + "imported_by": sorted(self.imported_by), + } + + +def load_gitignore(repo_root: pathlib.Path) -> Optional[pathspec.PathSpec]: + """Load .gitignore patterns.""" + gitignore_path = repo_root / ".gitignore" + if not gitignore_path.exists(): + return None + + with open(gitignore_path, "r", encoding="utf-8") as f: + patterns = f.read().splitlines() + + return pathspec.PathSpec.from_lines("gitwildmatch", patterns) + + +def should_analyze_file( + filepath: pathlib.Path, + repo_root: pathlib.Path, + gitignore: Optional[pathspec.PathSpec], +) -> bool: + """Check if a file should be analyzed.""" + rel_path = filepath.relative_to(repo_root).as_posix() + + # Skip common non-source directories + skip_dirs = { + "node_modules", + "dist", + "out", + ".venv", + "__pycache__", + ".git", + ".vscode-test", + } + for part in filepath.parts: + if part in skip_dirs: + return False + + # Skip if matched by gitignore + if gitignore and gitignore.match_file(rel_path): + return False + + # Skip test and mock files for dependency analysis + if any(x in rel_path for x in ["/test/", "/mocks/", ".test.", ".spec."]): + return False + + return True + + +def extract_imports_typescript( + filepath: pathlib.Path, repo_root: pathlib.Path +) -> Set[str]: + """Extract import paths from a TypeScript/JavaScript file.""" + try: + content = filepath.read_text(encoding="utf-8") + except (UnicodeDecodeError, OSError): + return set() + + imports = set() + + # Match various import patterns: + # import { x } from './path' + # import x from './path' + # import * as x from './path' + # import './path' + # const x = require('./path') + import_patterns = [ + r'import\s+(?:.*?\s+from\s+)?[\'"]([^\'"]+)[\'"]', + r'require\s*\(\s*[\'"]([^\'"]+)[\'"]\s*\)', + r'import\s*\(\s*[\'"]([^\'"]+)[\'"]\s*\)', # dynamic import + ] + + for pattern in import_patterns: + for match in re.finditer(pattern, content): + import_path = match.group(1) + + # Only track relative imports (local modules) + if import_path.startswith("."): + # Resolve relative path + resolved = resolve_import_path(filepath, import_path, repo_root) + if resolved: + imports.add(resolved) + + return imports + + +def resolve_import_path( + from_file: pathlib.Path, import_path: str, repo_root: pathlib.Path +) -> Optional[str]: + """Resolve a relative import path to a workspace-relative path.""" + from_dir = from_file.parent + + # Handle the import path + # Remove ./ or ../ prefixes and resolve + resolved = (from_dir / import_path).resolve() + + # Try common extensions + candidates = [ + resolved, + resolved.with_suffix(".ts"), + resolved.with_suffix(".tsx"), + resolved.with_suffix(".js"), + resolved / "index.ts", + resolved / "index.tsx", + resolved / "index.js", + ] + + for candidate in candidates: + if candidate.exists() and candidate.is_file(): + try: + return candidate.relative_to(repo_root).as_posix() + except ValueError: + return None + + return None + + +def build_dependency_graph(repo_root: pathlib.Path) -> Dict[str, ModuleInfo]: + """Build a dependency graph of all TypeScript modules.""" + gitignore = load_gitignore(repo_root) + modules: Dict[str, ModuleInfo] = {} + + # Find all TypeScript/JavaScript files + extensions = [".ts", ".tsx", ".js", ".jsx"] + source_files = [] + + for ext in extensions: + for filepath in repo_root.rglob(f"*{ext}"): + if should_analyze_file(filepath, repo_root, gitignore): + source_files.append(filepath) + + # First pass: extract imports + for filepath in source_files: + rel_path = filepath.relative_to(repo_root).as_posix() + imports = extract_imports_typescript(filepath, repo_root) + + modules[rel_path] = ModuleInfo(path=rel_path, imports=imports) + + # Second pass: compute imported_by (reverse dependencies) + for module_path, module_info in modules.items(): + for imported_path in module_info.imports: + if imported_path in modules: + modules[imported_path].imported_by.add(module_path) + + return modules + + +def find_circular_dependencies(modules: Dict[str, ModuleInfo]) -> List[List[str]]: + """Find circular dependency chains using DFS.""" + cycles = [] + visited = set() + rec_stack = set() + + def dfs(node: str, path: List[str]) -> None: + if node in rec_stack: + # Found cycle - extract it + cycle_start = path.index(node) + cycle = path[cycle_start:] + [node] + # Normalize cycle (start from smallest element) + min_idx = cycle.index(min(cycle[:-1])) # Exclude last element (duplicate) + normalized = cycle[min_idx:-1] + cycle[:min_idx] + [cycle[min_idx]] + if normalized not in cycles: + cycles.append(normalized) + return + + if node in visited: + return + + visited.add(node) + rec_stack.add(node) + + module = modules.get(node) + if module: + for imported in module.imports: + if imported in modules: + dfs(imported, path + [node]) + + rec_stack.remove(node) + + for module_path in modules: + if module_path not in visited: + dfs(module_path, []) + + return cycles + + +def find_highly_coupled_modules( + modules: Dict[str, ModuleInfo], threshold: int = 10 +) -> List[dict]: + """Find modules with too many dependencies (high fan-out).""" + highly_coupled = [] + + for module_path, module_info in modules.items(): + if module_info.fan_out > threshold: + highly_coupled.append( + { + "path": module_path, + "fan_out": module_info.fan_out, + "imports": sorted(module_info.imports), + } + ) + + highly_coupled.sort(key=lambda m: m["fan_out"], reverse=True) + return highly_coupled + + +def find_hub_modules(modules: Dict[str, ModuleInfo], threshold: int = 10) -> List[dict]: + """Find 'hub' modules with high fan-in (many dependents). + + These are candidates for careful change management - changes here affect many modules. + """ + hubs = [] + + for module_path, module_info in modules.items(): + if module_info.fan_in > threshold: + hubs.append( + { + "path": module_path, + "fan_in": module_info.fan_in, + "imported_by": sorted(module_info.imported_by), + } + ) + + hubs.sort(key=lambda m: m["fan_in"], reverse=True) + return hubs + + +def compute_layer_violations(modules: Dict[str, ModuleInfo]) -> List[dict]: + """Detect potential architectural layer violations. + + Common pattern: lower layers should not import from higher layers. + """ + # Define typical layer order (lower index = lower layer) + layer_order = [ + "common", + "utils", + "api", + "models", + "services", + "features", + "views", + "managers", + ] + + def get_layer(path: str) -> int: + """Get the layer index for a module path.""" + parts = path.lower().split("/") + for i, layer in enumerate(layer_order): + if layer in parts: + return i + return len(layer_order) # Unknown = highest layer + + violations = [] + for module_path, module_info in modules.items(): + module_layer = get_layer(module_path) + + for imported_path in module_info.imports: + imported_layer = get_layer(imported_path) + + # Lower layer importing from higher layer is a violation + if module_layer < imported_layer: + violations.append( + { + "from": module_path, + "from_layer": layer_order[module_layer] + if module_layer < len(layer_order) + else "unknown", + "imports": imported_path, + "imports_layer": layer_order[imported_layer] + if imported_layer < len(layer_order) + else "unknown", + } + ) + + return violations + + +def analyze_dependencies(repo_root: pathlib.Path) -> dict: + """Run complete dependency analysis. + + Returns: + Dictionary with dependency metrics + """ + modules = build_dependency_graph(repo_root) + + # Compute metrics + circular_deps = find_circular_dependencies(modules) + highly_coupled = find_highly_coupled_modules(modules) + hub_modules = find_hub_modules(modules) + layer_violations = compute_layer_violations(modules) + + # Summary statistics + fan_outs = [m.fan_out for m in modules.values()] + fan_ins = [m.fan_in for m in modules.values()] + + summary = { + "total_modules": len(modules), + "total_dependencies": sum(m.fan_out for m in modules.values()), + "avg_fan_out": round(sum(fan_outs) / len(fan_outs), 2) if fan_outs else 0, + "avg_fan_in": round(sum(fan_ins) / len(fan_ins), 2) if fan_ins else 0, + "max_fan_out": max(fan_outs) if fan_outs else 0, + "max_fan_in": max(fan_ins) if fan_ins else 0, + "circular_dependency_count": len(circular_deps), + "highly_coupled_count": len(highly_coupled), + "hub_module_count": len(hub_modules), + } + + # Module details (top by instability) + module_details = sorted( + [m.to_dict() for m in modules.values()], + key=lambda m: (m["instability"], m["fan_out"]), + reverse=True, + )[:30] + + return { + "summary": summary, + "circular_dependencies": circular_deps[:20], # Top 20 + "highly_coupled_modules": highly_coupled[:20], + "hub_modules": hub_modules[:20], + "layer_violations": layer_violations[:30], + "top_modules_by_instability": module_details, + } + + +if __name__ == "__main__": + import json + + repo = pathlib.Path(__file__).parent.parent + result = analyze_dependencies(repo) + print(json.dumps(result, indent=2)) diff --git a/analysis/git_analysis.py b/analysis/git_analysis.py new file mode 100644 index 00000000..8d5304e6 --- /dev/null +++ b/analysis/git_analysis.py @@ -0,0 +1,391 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +""" +Git-based code analysis inspired by "Your Code as a Crime Scene" and "Software Design X-Rays". + +Extracts metrics from git history: +- Change frequency (hotspots) +- Code churn (lines added/removed) +- Temporal coupling (files that change together) +- Author diversity / bus factor +- File age analysis +""" + +import pathlib +import subprocess +from collections import defaultdict +from dataclasses import dataclass, field +from datetime import datetime, timezone +from typing import Dict, List, Optional, Set, Tuple + +# Limit history analysis for performance +MAX_COMMITS = 1000 +DAYS_OF_HISTORY = 365 + + +@dataclass +class FileStats: + """Statistics for a single file from git history.""" + + path: str + change_count: int = 0 + lines_added: int = 0 + lines_removed: int = 0 + authors: Set[str] = field(default_factory=set) + last_modified: Optional[datetime] = None + first_seen: Optional[datetime] = None + + @property + def churn(self) -> int: + """Total code churn (additions + deletions).""" + return self.lines_added + self.lines_removed + + @property + def author_count(self) -> int: + """Number of unique authors who touched this file.""" + return len(self.authors) + + def to_dict(self) -> dict: + """Convert to JSON-serializable dictionary.""" + return { + "path": self.path, + "change_count": self.change_count, + "lines_added": self.lines_added, + "lines_removed": self.lines_removed, + "churn": self.churn, + "author_count": self.author_count, + "authors": sorted(self.authors), + "last_modified": self.last_modified.isoformat() + if self.last_modified + else None, + "first_seen": self.first_seen.isoformat() if self.first_seen else None, + "age_days": (datetime.now(timezone.utc) - self.last_modified).days + if self.last_modified + else None, + } + + +@dataclass +class TemporalCoupling: + """Represents files that frequently change together.""" + + file1: str + file2: str + coupled_commits: int + coupling_ratio: float # coupled_commits / min(file1_changes, file2_changes) + + def to_dict(self) -> dict: + return { + "file1": self.file1, + "file2": self.file2, + "coupled_commits": self.coupled_commits, + "coupling_ratio": round(self.coupling_ratio, 3), + } + + +def run_git_command(args: List[str], cwd: pathlib.Path) -> str: + """Run a git command and return stdout.""" + result = subprocess.run( + ["git"] + args, + cwd=cwd, + capture_output=True, + text=True, + check=True, + ) + return result.stdout + + +def get_tracked_files(repo_root: pathlib.Path) -> Set[str]: + """Get set of currently tracked files in the repository.""" + output = run_git_command(["ls-files"], repo_root) + return set(output.strip().split("\n")) if output.strip() else set() + + +def analyze_git_log(repo_root: pathlib.Path) -> Dict[str, FileStats]: + """ + Parse git log to extract file statistics. + + Uses git log --numstat for efficient extraction of: + - Change frequency per file + - Lines added/removed per file + - Authors per file + - Timestamps + """ + file_stats: Dict[str, FileStats] = defaultdict(lambda: FileStats(path="")) + + # Get commits from last N days, limited to MAX_COMMITS + since_date = datetime.now(timezone.utc).replace( + hour=0, minute=0, second=0, microsecond=0 + ) + since_date = since_date.replace( + year=since_date.year - 1 if DAYS_OF_HISTORY >= 365 else since_date.year + ) + + try: + log_output = run_git_command( + [ + "log", + f"--since={DAYS_OF_HISTORY} days ago", + f"-n{MAX_COMMITS}", + "--numstat", + "--format=%H|%aI|%aN", + "--no-merges", + ], + repo_root, + ) + except subprocess.CalledProcessError: + return {} + + current_commit_info: Optional[Tuple[str, datetime, str]] = None + tracked_files = get_tracked_files(repo_root) + + for line in log_output.split("\n"): + line = line.strip() + if not line: + continue + + # Check if this is a commit header line + if "|" in line and line.count("|") == 2: + parts = line.split("|") + if len(parts) == 3: + commit_hash, date_str, author = parts + try: + commit_date = datetime.fromisoformat( + date_str.replace("Z", "+00:00") + ) + current_commit_info = (commit_hash, commit_date, author) + except ValueError: + current_commit_info = None + continue + + # Parse numstat line: "added\tremoved\tfilepath" + if current_commit_info and "\t" in line: + parts = line.split("\t") + if len(parts) >= 3: + added, removed, filepath = parts[0], parts[1], parts[2] + + # Skip binary files (shown as "-") + if added == "-" or removed == "-": + continue + + # Only track files that currently exist + if filepath not in tracked_files: + continue + + # Skip test files and generated files for hotspot analysis + if _should_skip_file(filepath): + continue + + try: + lines_added = int(added) + lines_removed = int(removed) + except ValueError: + continue + + _, commit_date, author = current_commit_info + + if filepath not in file_stats: + file_stats[filepath] = FileStats(path=filepath) + + stats = file_stats[filepath] + stats.change_count += 1 + stats.lines_added += lines_added + stats.lines_removed += lines_removed + stats.authors.add(author) + + # Track dates + if stats.last_modified is None or commit_date > stats.last_modified: + stats.last_modified = commit_date + if stats.first_seen is None or commit_date < stats.first_seen: + stats.first_seen = commit_date + + return dict(file_stats) + + +def _should_skip_file(filepath: str) -> bool: + """Check if file should be excluded from analysis.""" + skip_patterns = [ + "node_modules/", + "dist/", + ".vscode-test/", + "__pycache__/", + ".git/", + "package-lock.json", + ".vsix", + ] + return any(pattern in filepath for pattern in skip_patterns) + + +def analyze_temporal_coupling( + repo_root: pathlib.Path, min_coupling: int = 3, min_ratio: float = 0.3 +) -> List[TemporalCoupling]: + """ + Find files that frequently change together (temporal coupling). + + High temporal coupling can indicate: + - Hidden dependencies + - Copy-paste code + - Features spread across files + + Args: + repo_root: Repository root path + min_coupling: Minimum number of co-changes to report + min_ratio: Minimum coupling ratio (0.0 to 1.0) + """ + # Track which files changed in each commit + commit_files: Dict[str, Set[str]] = defaultdict(set) + file_change_count: Dict[str, int] = defaultdict(int) + + try: + log_output = run_git_command( + [ + "log", + f"--since={DAYS_OF_HISTORY} days ago", + f"-n{MAX_COMMITS}", + "--name-only", + "--format=%H", + "--no-merges", + ], + repo_root, + ) + except subprocess.CalledProcessError: + return [] + + tracked_files = get_tracked_files(repo_root) + current_commit: Optional[str] = None + + for line in log_output.split("\n"): + line = line.strip() + if not line: + continue + + # Commit hash is 40 hex characters + if len(line) == 40 and all(c in "0123456789abcdef" for c in line): + current_commit = line + continue + + if current_commit and line in tracked_files and not _should_skip_file(line): + commit_files[current_commit].add(line) + file_change_count[line] += 1 + + # Calculate coupling between file pairs + coupling_count: Dict[Tuple[str, str], int] = defaultdict(int) + + for files in commit_files.values(): + file_list = sorted(files) + for i, file1 in enumerate(file_list): + for file2 in file_list[i + 1 :]: + coupling_count[(file1, file2)] += 1 + + # Filter and create coupling objects + couplings: List[TemporalCoupling] = [] + for (file1, file2), count in coupling_count.items(): + if count < min_coupling: + continue + + # Calculate coupling ratio relative to less-changed file + min_changes = min(file_change_count[file1], file_change_count[file2]) + ratio = count / min_changes if min_changes > 0 else 0 + + if ratio >= min_ratio: + couplings.append( + TemporalCoupling( + file1=file1, + file2=file2, + coupled_commits=count, + coupling_ratio=ratio, + ) + ) + + # Sort by coupling strength + couplings.sort(key=lambda c: (c.coupling_ratio, c.coupled_commits), reverse=True) + return couplings[:50] # Top 50 couplings + + +def calculate_bus_factor(file_stats: Dict[str, FileStats]) -> dict: + """ + Calculate bus factor metrics. + + Bus factor = minimum number of authors who need to leave + before knowledge is lost. + + Low bus factor (1-2) indicates knowledge silos. + """ + # Overall project bus factor + all_authors: Set[str] = set() + single_author_files: List[str] = [] + + for stats in file_stats.values(): + all_authors.update(stats.authors) + if stats.author_count == 1: + single_author_files.append(stats.path) + + # Files with low bus factor (knowledge silos) + knowledge_silos = [ + {"path": stats.path, "sole_author": list(stats.authors)[0]} + for stats in file_stats.values() + if stats.author_count == 1 and stats.change_count >= 3 + ] + + # Sort by change count (more changes = higher risk) + knowledge_silos.sort(key=lambda x: file_stats[x["path"]].change_count, reverse=True) + + return { + "total_authors": len(all_authors), + "single_author_file_count": len(single_author_files), + "single_author_file_ratio": round(len(single_author_files) / len(file_stats), 3) + if file_stats + else 0, + "knowledge_silos": knowledge_silos[:20], # Top 20 at-risk files + } + + +def get_hotspots(file_stats: Dict[str, FileStats], top_n: int = 30) -> List[dict]: + """ + Identify hotspots - files that change frequently. + + Hotspots are prime candidates for: + - Code review focus + - Refactoring + - Test coverage + """ + sorted_files = sorted( + file_stats.values(), + key=lambda s: (s.change_count, s.churn), + reverse=True, + ) + return [f.to_dict() for f in sorted_files[:top_n]] + + +def analyze_repository(repo_root: pathlib.Path) -> dict: + """ + Run complete git-based analysis on a repository. + + Returns a dictionary with all git metrics. + """ + file_stats = analyze_git_log(repo_root) + temporal_coupling = analyze_temporal_coupling(repo_root) + bus_factor = calculate_bus_factor(file_stats) + hotspots = get_hotspots(file_stats) + + return { + "hotspots": hotspots, + "temporal_coupling": [c.to_dict() for c in temporal_coupling], + "bus_factor": bus_factor, + "summary": { + "files_analyzed": len(file_stats), + "total_changes": sum(s.change_count for s in file_stats.values()), + "total_churn": sum(s.churn for s in file_stats.values()), + "history_days": DAYS_OF_HISTORY, + "max_commits": MAX_COMMITS, + }, + } + + +if __name__ == "__main__": + import json + + repo = pathlib.Path(__file__).parent.parent + results = analyze_repository(repo) + print(json.dumps(results, indent=2)) diff --git a/analysis/pyproject.toml b/analysis/pyproject.toml new file mode 100644 index 00000000..74ba285d --- /dev/null +++ b/analysis/pyproject.toml @@ -0,0 +1,20 @@ +[project] +name = "vscode-python-environments-analysis" +version = "0.1.0" +description = "Code health and technical debt analysis tools" +requires-python = ">=3.9" +dependencies = [ + "gitpython>=3.1.0", + "radon>=6.0.0", + "pathspec>=0.11.0", +] + +[project.optional-dependencies] +dev = [ + "pytest>=7.0.0", +] + +[tool.uv] +dev-dependencies = [ + "pytest>=7.0.0", +] diff --git a/analysis/snapshot.py b/analysis/snapshot.py new file mode 100644 index 00000000..3f35c911 --- /dev/null +++ b/analysis/snapshot.py @@ -0,0 +1,288 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +"""Main orchestrator for code health snapshot generation. + +Aggregates results from all analysis modules into a single JSON snapshot +suitable for tracking technical debt over time. +""" + +import argparse +import json +import pathlib +import subprocess +import sys +from datetime import datetime, timezone +from typing import Optional + +# Import analysis modules +from complexity_analysis import analyze_complexity +from debt_indicators import analyze_debt +from dependency_analysis import analyze_dependencies +from git_analysis import analyze_repository as analyze_git + +# Snapshot schema version - increment when breaking changes are made +SCHEMA_VERSION = "1.0.0" + + +def get_git_info(repo_root: pathlib.Path) -> dict: + """Get current git commit information.""" + try: + sha = subprocess.run( + ["git", "rev-parse", "HEAD"], + cwd=repo_root, + capture_output=True, + text=True, + check=True, + ).stdout.strip() + + short_sha = subprocess.run( + ["git", "rev-parse", "--short", "HEAD"], + cwd=repo_root, + capture_output=True, + text=True, + check=True, + ).stdout.strip() + + branch = subprocess.run( + ["git", "rev-parse", "--abbrev-ref", "HEAD"], + cwd=repo_root, + capture_output=True, + text=True, + check=True, + ).stdout.strip() + + # Get commit message + message = subprocess.run( + ["git", "log", "-1", "--pretty=%s"], + cwd=repo_root, + capture_output=True, + text=True, + check=True, + ).stdout.strip() + + return { + "sha": sha, + "short_sha": short_sha, + "branch": branch, + "message": message[:200], # Truncate long messages + } + except subprocess.CalledProcessError: + return { + "sha": "unknown", + "short_sha": "unknown", + "branch": "unknown", + "message": "", + } + + +def compute_priority_hotspots(git_data: dict, complexity_data: dict) -> list: + """Compute priority hotspots by combining change frequency with complexity. + + Files that are both frequently changed AND complex are the highest priority + for refactoring attention (the "X-Ray" approach from Software Design X-Rays). + """ + # Build complexity lookup by path + complexity_by_path = {} + for lang_files in complexity_data.get("by_language", {}).values(): + for file_data in lang_files: + complexity_by_path[file_data["path"]] = file_data + + priority_hotspots = [] + for hotspot in git_data.get("hotspots", []): + path = hotspot["path"] + complexity_info = complexity_by_path.get(path, {}) + + # Priority score = change_count * max_complexity + # Higher score = more urgent attention needed + change_count = hotspot.get("change_count", 0) + max_complexity = complexity_info.get("max_complexity", 1) + priority_score = change_count * max_complexity + + priority_hotspots.append( + { + "path": path, + "change_count": change_count, + "churn": hotspot.get("churn", 0), + "max_complexity": max_complexity, + "avg_complexity": complexity_info.get("avg_complexity", 0), + "code_lines": complexity_info.get("code_lines", 0), + "priority_score": priority_score, + } + ) + + # Sort by priority score descending + priority_hotspots.sort(key=lambda x: x["priority_score"], reverse=True) + return priority_hotspots[:20] # Top 20 + + +def compute_summary_metrics( + git_data: dict, + complexity_data: dict, + debt_data: dict, + dependency_data: dict, +) -> dict: + """Compute high-level summary metrics for dashboard/trending.""" + return { + # Change activity + "files_with_changes": git_data.get("summary", {}).get("files_analyzed", 0), + "total_changes": git_data.get("summary", {}).get("total_changes", 0), + "total_churn": git_data.get("summary", {}).get("total_churn", 0), + # Complexity + "total_files_analyzed": complexity_data.get("summary", {}).get( + "total_files", 0 + ), + "total_functions": complexity_data.get("summary", {}).get("total_functions", 0), + "high_complexity_files": len( + complexity_data.get("summary", {}).get("files_with_high_complexity", []) + ), + "avg_file_complexity": complexity_data.get("summary", {}).get( + "avg_file_complexity", 0 + ), + # Debt markers + "todo_count": debt_data.get("debt_markers", {}) + .get("summary", {}) + .get("TODO", 0), + "fixme_count": debt_data.get("debt_markers", {}) + .get("summary", {}) + .get("FIXME", 0), + "total_debt_markers": debt_data.get("debt_markers", {}).get("total_count", 0), + "large_file_count": debt_data.get("large_files", {}).get("count", 0), + "long_function_count": debt_data.get("long_functions", {}).get("count", 0), + # Dependencies + "module_count": dependency_data.get("summary", {}).get("total_modules", 0), + "circular_dependency_count": dependency_data.get("summary", {}).get( + "circular_dependency_count", 0 + ), + "highly_coupled_module_count": dependency_data.get("summary", {}).get( + "highly_coupled_count", 0 + ), + # Bus factor + "single_author_file_ratio": git_data.get("bus_factor", {}).get( + "single_author_file_ratio", 0 + ), + "total_authors": git_data.get("bus_factor", {}).get("total_authors", 0), + } + + +def generate_snapshot( + repo_root: pathlib.Path, output_path: Optional[pathlib.Path] = None +) -> dict: + """Generate a complete code health snapshot. + + Args: + repo_root: Path to repository root + output_path: Optional path to write JSON output + + Returns: + Complete snapshot dictionary + """ + print("Starting code health analysis...") + + # Gather metadata + git_info = get_git_info(repo_root) + timestamp = datetime.now(timezone.utc).isoformat() + + print(f" Commit: {git_info['short_sha']} ({git_info['branch']})") + print(f" Time: {timestamp}") + + # Run all analyses + print(" Analyzing git history...") + git_data = analyze_git(repo_root) + + print(" Analyzing code complexity...") + complexity_data = analyze_complexity(repo_root) + + print(" Scanning for debt indicators...") + debt_data = analyze_debt(repo_root) + + print(" Analyzing dependencies...") + dependency_data = analyze_dependencies(repo_root) + + # Compute derived metrics + print(" Computing priority hotspots...") + priority_hotspots = compute_priority_hotspots(git_data, complexity_data) + + print(" Generating summary metrics...") + summary = compute_summary_metrics( + git_data, complexity_data, debt_data, dependency_data + ) + + # Assemble snapshot + snapshot = { + "metadata": { + "schema_version": SCHEMA_VERSION, + "generated_at": timestamp, + "git": git_info, + }, + "summary": summary, + "priority_hotspots": priority_hotspots, + "git_analysis": git_data, + "complexity": complexity_data, + "debt_indicators": debt_data, + "dependencies": dependency_data, + } + + # Write output if path specified + if output_path: + output_path.parent.mkdir(parents=True, exist_ok=True) + with open(output_path, "w", encoding="utf-8") as f: + json.dump(snapshot, f, indent=2, ensure_ascii=False) + print(f" Snapshot written to: {output_path}") + + print("Analysis complete!") + return snapshot + + +def build_arg_parser() -> argparse.ArgumentParser: + """Build argument parser.""" + parser = argparse.ArgumentParser( + description="Generate code health snapshot for technical debt tracking." + ) + parser.add_argument( + "--output", + "-o", + type=pathlib.Path, + default=pathlib.Path("analysis-snapshot.json"), + help="Output path for snapshot JSON (default: analysis-snapshot.json)", + ) + parser.add_argument( + "--repo-root", + type=pathlib.Path, + default=None, + help="Repository root path (default: parent of this script)", + ) + parser.add_argument( + "--pretty", + action="store_true", + help="Pretty-print JSON output to stdout", + ) + return parser + + +def main() -> int: + """Main entry point.""" + parser = build_arg_parser() + args = parser.parse_args() + + repo_root = args.repo_root or pathlib.Path(__file__).parent.parent + repo_root = repo_root.resolve() + + if not (repo_root / ".git").exists(): + print(f"Error: {repo_root} is not a git repository", file=sys.stderr) + return 1 + + try: + snapshot = generate_snapshot(repo_root, args.output) + + if args.pretty: + print(json.dumps(snapshot, indent=2, ensure_ascii=False)) + + return 0 + except Exception as e: + print(f"Error generating snapshot: {e}", file=sys.stderr) + return 1 + + +if __name__ == "__main__": + sys.exit(main()) From e6740571d941ca3830cec292014771a6e02a4aad Mon Sep 17 00:00:00 2001 From: Karthik Nadig Date: Fri, 13 Feb 2026 11:18:25 -0800 Subject: [PATCH 2/7] main agents --- .github/agents/maintainer.agent.md | 610 ++++++++++++++++++++++++++ .github/agents/reviewer.agent.md | 678 +++++++++++++++++++++++++++++ 2 files changed, 1288 insertions(+) create mode 100644 .github/agents/maintainer.agent.md create mode 100644 .github/agents/reviewer.agent.md diff --git a/.github/agents/maintainer.agent.md b/.github/agents/maintainer.agent.md new file mode 100644 index 00000000..88275279 --- /dev/null +++ b/.github/agents/maintainer.agent.md @@ -0,0 +1,610 @@ +--- +name: maintainer +description: 'Project maintainer for vscode-python-environments. Drives planning from codebase snapshots and open issues, implements TypeScript/Node.js changes, self-reviews via Reviewer agent, and manages the full PR lifecycle with Copilot review.' +tools: + [ + 'vscode/getProjectSetupInfo', + 'vscode/runCommand', + 'vscode/askQuestions', + 'execute/getTerminalOutput', + 'execute/awaitTerminal', + 'execute/killTerminal', + 'execute/createAndRunTask', + 'execute/testFailure', + 'execute/runInTerminal', + 'read/terminalSelection', + 'read/terminalLastCommand', + 'read/problems', + 'read/readFile', + 'agent', + 'github/add_comment_to_pending_review', + 'github/add_issue_comment', + 'github/create_pull_request', + 'github/get_label', + 'github/issue_read', + 'github/issue_write', + 'github/list_branches', + 'github/list_commits', + 'github/list_issues', + 'github/list_pull_requests', + 'github/merge_pull_request', + 'github/pull_request_read', + 'github/pull_request_review_write', + 'github/request_copilot_review', + 'github/search_issues', + 'github/search_pull_requests', + 'github/update_pull_request', + 'github/update_pull_request_branch', + 'edit/createDirectory', + 'edit/createFile', + 'edit/editFiles', + 'search', + 'web', + 'todo', + ] +--- + +# Prime Directive + +**The codebase must always be shippable. Every merge leaves the repo in a better state than before.** + +# Project Context + +**vscode-python-environments** — A VS Code extension providing a unified Python environment experience. Manages environment discovery, creation, selection, terminal activation, and package management across multiple Python managers. + +**Stack:** + +- **Language:** TypeScript (Node.js, VS Code Extension API) +- **Build:** Webpack + TypeScript compiler +- **Test Framework:** Mocha + Sinon for unit tests, @vscode/test-cli for E2E/integration +- **Environment Managers Supported:** venv, conda, poetry, pipenv, pyenv, pixi, uv +- **Native Component:** PET (Python Environment Tools) — Rust-based locator server for fast environment discovery + +**Key Architecture:** + +- `src/managers/` — Environment manager implementations (one folder per manager) +- `src/features/` — Core features (terminal activation, settings, views, execution) +- `src/common/` — Shared utilities, APIs, and error handling +- `src/api.ts` — Public API surface for other extensions +- `analysis/` — Python scripts for codebase health snapshots + +**Critical Guidelines (from `.github/instructions/generic.instructions.md`):** + +- **Cross-platform paths:** Use `path.join()` or `path.resolve()`, never hardcode POSIX paths +- **Settings precedence:** Always pass scope to `getConfiguration()`, use `inspect()` for explicit values +- **Localization:** All user-facing messages use `l10n.t()`, logs don't need localization +- **Logging:** Use `traceLog`/`traceVerbose`, never `console.log` + +**CLI tools:** + +- `npm` for build, test, lint +- `gh` CLI for GitHub interactions +- `python` for running analysis scripts + +--- + +# Workflow Overview + +``` +CI (snapshot) → Planning → Development → Review → Merge +``` + +All work follows this loop. No shortcuts. + +--- + +# Planning Phase + +## When asked "What should we work on next?" + +1. **Gather context:** + - Check open GitHub issues (`github/list_issues`, `github/search_issues`) + - Review labeled issues (bugs, enhancements, manager-specific) + - Generate or download the latest codebase snapshot (see Snapshot Usage below) + - Check open PRs for related work + +2. **Analyze and prioritize:** + - Cross-reference open issues against snapshot hotspots and debt indicators + - Identify bugs vs enhancements vs chores + - Consider cross-platform impact (changes affecting Windows, macOS, Linux) + - Factor in manager dependencies (changes to `common/` affect many managers) + - Use snapshot data to identify refactoring opportunities + +3. **Present a curated priority list:** + - Show 3–5 actionable work items ranked by impact and readiness + - For each item: brief description, affected components, estimated complexity + - Recommend the top pick with reasoning + +4. **User picks a work item** → proceed to Development Phase + +### Snapshot Usage + +The codebase snapshot is generated by the analysis scripts in `analysis/`. Run it locally to get current metrics. + +**To generate a snapshot:** + +```powershell +# Ensure Python is available +cd analysis +python snapshot.py --output ./analysis-snapshot.json +``` + +**Snapshot structure** (`analysis-snapshot.json`): + +```json +{ + "metadata": { + "schema_version": "1.0.0", + "generated_at": "ISO timestamp", + "git": { "sha": "...", "branch": "...", "message": "..." } + }, + "summary": { + "files_with_changes": N, + "total_changes": N, + "total_churn": N, + "high_complexity_files": N, + "todo_count": N, + "fixme_count": N, + "circular_dependency_count": N, + "single_author_file_ratio": 0.0 + }, + "priority_hotspots": [ + { "path": "...", "change_count": N, "churn": N, "max_complexity": N, "priority_score": N } + ], + "git_analysis": { + "hotspots": [...], + "temporal_coupling": [...], + "bus_factor": { "total_authors": N, "knowledge_silos": [...] } + }, + "complexity": { + "by_language": { "typescript": [...], "python": [...] }, + "high_complexity_functions": [...] + }, + "debt_indicators": { + "debt_markers": { "by_type": { "TODO": [...], "FIXME": [...] } }, + "large_files": { "files": [...] }, + "long_functions": { "functions": [...] } + }, + "dependencies": { + "circular_dependencies": [...], + "highly_coupled_modules": [...], + "hub_modules": [...], + "layer_violations": [...] + } +} +``` + +**How to interpret:** + +- **priority_hotspots** (sorted by priority_score desc): Files that change often AND are complex — prime refactoring candidates +- **temporal_coupling**: File pairs with high coupling ratio indicate hidden dependencies +- **knowledge_silos**: Files with single author — bus factor risk +- **debt_markers.by_type.FIXME**: High-priority technical debt +- **circular_dependencies**: Architectural issues requiring attention +- **hub_modules**: High fan-in modules — changes here affect many files + +--- + +# Development Phase + +## 1. Create an Issue + +Every piece of work starts with a GitHub issue — no exceptions. + +- Search for duplicates first (`github/search_issues`) +- Create the issue with a clear title, description, and labels +- Link to relevant manager or feature area if applicable + +## 2. Create a Feature Branch + +```powershell +git checkout main; git pull +git checkout -b feature/issue-N # or bug/issue-N, chore/issue-N +``` + +Use the issue number in the branch name for traceability. + +## 3. Implement Changes + +- Follow TypeScript conventions and the project's patterns +- Write/update tests alongside code +- Keep changes focused — one issue per branch +- Follow the critical guidelines from `.github/instructions/generic.instructions.md` + +### Code Conventions + +- **Cross-platform paths:** Use `path.join()`, `path.resolve()`, never concatenate with `/` +- **Settings:** Always pass scope to `getConfiguration()`, use `inspect()` for explicit value checks +- **Localization:** `l10n.t()` for user-facing messages, no localization for logs +- **Logging:** Use `traceLog`, `traceVerbose` — never `console.log` +- **API types:** Handle union types (`Uri | string`), use `env.envId.id` not `env.id` +- **Thread safety:** Be careful with async operations, avoid race conditions +- **Tests:** Follow patterns in `src/test/`, use mocks from `src/test/mocks/` + +### High-Risk Areas (Extra Scrutiny Required) + +Based on past issues, these areas have high bug density: + +| File Area | Risk | Common Issues | +| ------------------------------------------- | --------------------- | -------------------------------------------- | +| `src/managers/common/nativePythonFinder.ts` | Environment discovery | Type guards, cache issues, resource leaks | +| `src/features/terminal/` | Terminal activation | Timing issues, shell detection, reveal logic | +| `src/managers/poetry/` | Poetry manager | Cache-dir placeholder, env var precedence | +| `src/managers/pyenv/` | PyEnv manager | Windows path calculation | +| `src/features/settings/` | Settings | Precedence, inspect() vs get() | +| `src/features/interpreterSelection.ts` | Interpreter selection | Persistence, multi-workspace | + +## 4. Self-Review (MANDATORY) + +**Before every commit, invoke the Reviewer agent as a sub-agent.** + +This is non-negotiable. Every code change must pass through the Reviewer agent before being committed. + +### How to invoke: + +Run the **Reviewer** agent (`.github/agents/reviewer.agent.md`) as a sub-agent with instructions to: + +1. Get the list of changed files (`git diff --name-only`) +2. Read and review each changed file +3. Report findings in the standard format (Critical / Important / Suggestions / Questions) + +### Handling Reviewer findings: + +- **Critical:** Must fix before committing. Fix the issue, then re-run the Reviewer agent. +- **Important:** Should fix before committing. Fix and re-run. +- **Suggestions:** Apply if reasonable, note if deferred. +- **Questions:** Answer them — if you can't justify the decision, reconsider it. + +**Loop until the Reviewer agent returns clean or only has minor suggestions.** + +## 5. Pre-Commit Checks (REQUIRED) + +Before committing, always run: + +```powershell +# Lint check (required) +npm run lint + +# Type check (required) +npm run compile-tests + +# Run unit tests (required) +npm run unittest +``` + +If lint or tests fail, fix them before committing. + +## 6. Commit + +Format: `[type]: brief description (Fixes #N)` + +Types: `feat`, `fix`, `chore`, `docs`, `refactor`, `test` + +```powershell +git add -A +git commit -m "feat: add pyenv-win support for Windows (Fixes #42)" +``` + +## 7. Push & Create PR + +```powershell +git push -u origin feature/issue-N +``` + +Create a PR via `github/create_pull_request`: + +- **Title:** Same as commit message (or summarized if multiple commits) +- **Body:** Keep it concise: + - 1–2 sentence summary of what and why + - Brief bullet list of key changes (5–10 items max) + - `Fixes #N` to auto-close the issue +- **Do NOT** write marketing copy, exhaustive file lists, or before/after comparisons + +--- + +# Review & Iterate Phase + +**DO NOT yield to the user until review is complete or 8 minutes have elapsed.** + +## 1. Request Copilot Review + +After pushing and creating the PR, request review from Copilot using `github/request_copilot_review`. + +## 2. Wait for Review + +Poll for review completion: + +- Wait ~2 minutes initially +- Then poll every 30 seconds +- Maximum wait: 8 minutes total + +``` +github/pull_request_read (method: get_review_comments) → check for comments +``` + +## 3. Handle Review Comments + +If review comments exist: + +1. Read and understand each comment +2. Determine if the comment is actionable (not just informational or positive feedback) +3. Make the necessary code fixes for actionable comments +4. **Re-run the Reviewer agent on the fixes** (mandatory — same as step 4 in Development) +5. **Run pre-commit checks** (`npm run lint`, `npm run compile-tests`, `npm run unittest`) +6. **Resolve addressed review threads** using `gh` CLI: + + ```powershell + # Get thread IDs + gh api graphql -f query='{ + repository(owner: "microsoft", name: "vscode-python-environments") { + pullRequest(number: N) { + reviewThreads(first: 50) { + nodes { id isResolved } + } + } + } + }' + + # Resolve each addressed thread + gh api graphql -f query='mutation { + resolveReviewThread(input: {threadId: "THREAD_ID"}) { + thread { isResolved } + } + }' + ``` + +7. Commit the fixes: `fix: address review feedback (PR #N)` +8. Push the fixes +9. Re-request Copilot review (`github/request_copilot_review`) +10. Wait and poll again (repeat from step 2) + +## 4. Review Complete + +Review is considered complete when: + +- A new review comes back with **no actionable comments**, OR +- The PR is **Approved**, OR +- After re-requesting review, a full polling cycle (8 min) completes and `github/pull_request_read (get_review_comments)` shows **no unresolved + non-outdated threads** + +**DO NOT suggest merging** until one of these conditions is met. + +--- + +# Merge & Cleanup + +Once review is complete and all checks pass: + +1. **Merge the PR:** + + ``` + github/merge_pull_request + ``` + +2. **Delete the feature branch:** + + ```powershell + git checkout main; git pull + git branch -d feature/issue-N + ``` + + If the branch was squash-merged and `git branch -d` complains, use `git branch -D` after verifying the work is on main. + + Skip `git push origin --delete ` if GitHub already auto-deleted the remote branch. + +3. **CI triggers:** Push to main runs the full CI pipeline. Consider generating a fresh snapshot for future planning. + +--- + +# On-Demand Tasks + +## "Check what needs work" + +Run the Planning Phase flow above. + +## "Review this code" + +Invoke the Reviewer agent on the specified files or current changes. + +## "Create an issue for X" + +Search for duplicates, then create a well-formatted issue with labels. + +## "Run tests" + +```powershell +# Unit tests only +npm run unittest + +# Build extension (required for smoke/E2E/integration) +npm run compile + +# Smoke tests (real VS Code instance) +npm run smoke-test + +# Integration tests +npm run integration-test + +# E2E tests +npm run e2e-test +``` + +## "Check for lint/type errors" + +```powershell +# Lint check +npm run lint + +# Type check +npm run compile-tests +``` + +## "Generate a snapshot" + +```powershell +cd analysis +python snapshot.py --output ./analysis-snapshot.json --pretty +``` + +## "Build the extension" + +```powershell +# Development build +npm run compile + +# Production build +npm run package + +# Package VSIX +npm run vsce-package +``` + +--- + +# Principles + +1. **Issue-first:** No code without an issue. No branch without an issue number. +2. **Review-always:** The Reviewer agent runs before every commit. No exceptions. +3. **Small PRs:** One issue, one branch, one focused PR. Split large work into sub-issues. +4. **Cross-platform first:** Always consider Windows, macOS, and Linux behavior differences. +5. **Settings precedence:** Respect VS Code's workspace folder → workspace → user order. +6. **User decides scope:** Present options, let the user choose. Don't unilaterally decide priorities. +7. **Ship clean:** Every merge leaves the repo better than before. No "fix later" debt without an issue. + +--- + +# Critical Patterns to Enforce + +## Cross-Platform Paths + +```typescript +// WRONG: POSIX-style path +const envPath = homeDir + '/.venv/bin/python'; + +// RIGHT: Use path.join +const envPath = path.join(homeDir, '.venv', 'bin', 'python'); +``` + +```typescript +// WRONG: path.normalize for comparisons on Windows +const normalized = path.normalize(fsPath); + +// RIGHT: Use path.resolve on BOTH sides +const normalized = path.resolve(fsPath); +const other = path.resolve(e.environmentPath.fsPath); +``` + +## Settings Precedence + +```typescript +// WRONG: Missing scope +const config = vscode.workspace.getConfiguration('python-envs'); + +// RIGHT: Pass scope for workspace folder settings +const config = vscode.workspace.getConfiguration('python-envs', workspaceFolder); +``` + +```typescript +// WRONG: Using get() when checking explicit values +if (config.get('useEnvironmentsExtension')) { +} + +// RIGHT: Use inspect() and check explicit values only +const inspected = config.inspect('useEnvironmentsExtension'); +const hasExplicitValue = + inspected?.globalValue !== undefined || + inspected?.workspaceValue !== undefined || + inspected?.workspaceFolderValue !== undefined; +``` + +## Localization + +```typescript +// WRONG: Hardcoded user message +vscode.window.showErrorMessage('Failed to discover environments'); + +// RIGHT: Use l10n.t() +import * as l10n from '@vscode/l10n'; +vscode.window.showErrorMessage(l10n.t('Failed to discover environments')); +``` + +## Logging + +```typescript +// WRONG: Using console.log +console.log('Discovered environment:', env); + +// RIGHT: Use extension logging +import { traceLog, traceVerbose } from './common/logging'; +traceLog('Discovered environment:', env.name); +``` + +## API Types + +```typescript +// WRONG: Assuming Uri +async runInDedicatedTerminal(terminalKey: Uri | string, ...): Promise { + const fsPath = terminalKey.fsPath; // Crashes if string! + +// RIGHT: Handle both types +async runInDedicatedTerminal(terminalKey: Uri | string, ...): Promise { + const keyPart = terminalKey instanceof Uri + ? path.normalize(terminalKey.fsPath) + : terminalKey; +``` + +--- + +# Test Guidance + +## Test Types + +| Type | Command | When to Use | +| ----------- | -------------------------- | ----------------------------------- | +| Unit | `npm run unittest` | Quick feedback on isolated logic | +| Smoke | `npm run smoke-test` | Basic functionality in real VS Code | +| Integration | `npm run integration-test` | Component interaction tests | +| E2E | `npm run e2e-test` | Full user workflow tests | + +## Before Running Smoke/E2E/Integration Tests + +**CRITICAL:** These tests run against `dist/extension.js` built by webpack. + +```powershell +# Must run webpack build first! +npm run compile + +# Then run tests +npm run smoke-test +``` + +Without `npm run compile`, tests run against stale/missing code. + +--- + +# Manager-Specific Knowledge + +## Poetry + +- Check `POETRY_VIRTUALENVS_IN_PROJECT` env var +- Handle `{cache-dir}` placeholder in paths +- Platform-specific cache: Windows `%LOCALAPPDATA%\pypoetry\Cache`, macOS `~/Library/Caches/pypoetry` + +## PyEnv + +- Windows uses `pyenv-win` with different directory structure +- Use `path.resolve()` not `path.normalize()` for path comparisons + +## Conda + +- Fish shell uses different activation syntax +- Check Windows registry for installations + +## Pipenv + +- Check `WORKON_HOME` and `XDG_DATA_HOME` environment variables + +## Terminal Activation + +- Shell type detection for bash, zsh, fish, PowerShell, cmd +- `shellStartup` vs `command` activation patterns +- Terminal reveal timing for scripts with `input()` diff --git a/.github/agents/reviewer.agent.md b/.github/agents/reviewer.agent.md new file mode 100644 index 00000000..9290682c --- /dev/null +++ b/.github/agents/reviewer.agent.md @@ -0,0 +1,678 @@ +--- +name: reviewer +description: 'Deep code reviewer for vscode-python-environments. Catches cross-platform bugs, terminal activation issues, settings precedence problems, environment discovery failures, and API type safety issues that automated tools miss.' +tools: + [ + 'read/problems', + 'read/readFile', + 'agent', + 'github/issue_read', + 'github/list_issues', + 'github/list_pull_requests', + 'github/pull_request_read', + 'github/search_code', + 'github/search_issues', + 'github/search_pull_requests', + 'search', + 'web', + ] +--- + +# Code Reviewer + +A thorough reviewer for the vscode-python-environments extension (VS Code extension + PET native locator). Goes beyond syntax checking to catch cross-platform path bugs, terminal activation issues, settings precedence problems, and environment discovery failures. + +## Philosophy + +**Don't just check what the code does. Question how it handles cross-platform scenarios, multi-workspace configurations, and environment manager edge cases.** + +Automated reviews consistently miss: + +- Cross-platform path handling bugs (Windows vs POSIX) +- Terminal activation timing and ordering issues +- Settings precedence violations (workspace folder vs workspace vs user) +- Environment discovery edge cases (missing env variables, malformed configs) +- API type mismatches (Uri vs string parameters) +- Multi-root workspace state management +- Localization gaps in user-facing messages +- Accessibility regressions + +--- + +## Review Process + +### 1. Understand Context First + +Before reading code: + +- What issue does this change claim to fix? +- What manager is affected? (venv, conda, poetry, pipenv, pyenv) +- Does it touch terminal activation, settings, or environment discovery? +- Is this platform-specific code? + +### 2. Trace Data Flow + +Follow the flow from entry to exit: + +- Where does the path/URI come from? (user input, settings, VS Code API, PET server) +- Is it normalized correctly for cross-platform comparison? +- Does it pass through settings with proper precedence? +- Where does output go? (settings file, UI, terminal, notification) + +### 3. Question the Design + +Ask "why" at least once per significant change: + +- Why this approach over alternatives? +- What happens when the user has multiple workspace folders? +- What happens on Windows vs macOS vs Linux? +- Does this match the behavior documented in `docs/design.md`? + +### 4. Check Ripple Effects + +- Search for usages of changed functions/interfaces +- Consider downstream consumers (Python extension, Jupyter, third-party extensions using the API) +- Look for implicit contracts being broken (API response shapes, settings format) + +--- + +## Critical Review Areas + +### Cross-Platform Path Handling + +**CRITICAL**: This extension runs on Windows, macOS, and Linux. Path bugs are the #1 source of issues. + +```typescript +// RED FLAG: Using string concatenation for paths +const envPath = homeDir + '/.venv/bin/python'; // POSIX only! + +// REQUIRED: Use path.join() +const envPath = path.join(homeDir, '.venv', 'bin', 'python'); +``` + +```typescript +// RED FLAG: Using path.normalize() for path comparisons on Windows +const normalized = path.normalize(fsPath); +return n === normalized; // Will fail if one is '\test' and other is 'C:\test' + +// REQUIRED: Use path.resolve() on BOTH sides +const normalized = path.resolve(fsPath); +const other = path.resolve(e.environmentPath.fsPath); +return normalized === other; +``` + +**Platform-Specific Path Gotchas:** + +- **Windows**: + - pyenv-win uses `pyenv.bat`, not `pyenv` or `pyenv.exe` + - Poetry cache: `%LOCALAPPDATA%\pypoetry\Cache\virtualenvs` (NOT `~/.cache`) + - Long paths (>260 chars) may cause failures + - Mapped drives may not be accessible + - `path.resolve('\test')` → `C:\test`, but `path.normalize('\test')` → `\test` +- **macOS**: + - Homebrew has complex symlink chains + - Poetry cache: `~/Library/Caches/pypoetry/virtualenvs` + - XCode vs Command Line Tools Python +- **Linux**: + - `/bin` may be symlink to `/usr/bin` + - XDG directories: `~/.local/share/virtualenvs` for pipenv + - Poetry cache: `~/.cache/pypoetry/virtualenvs` + +### Terminal Activation + +**Terminal activation is complex and timing-sensitive:** + +```typescript +// RED FLAG: Assuming terminal is ready immediately +const terminal = vscode.window.createTerminal('Python'); +terminal.sendText('python --version'); // May execute before shell is ready! + +// REQUIRED: Use shell integration or wait for readiness +await this.waitForShellIntegration(terminal); +terminal.sendText(command); +``` + +**Activation Strategy Checklist:** + +- `shellStartup` vs `command` activation type handled correctly? +- Shell type detection working? (bash, zsh, fish, PowerShell, cmd) +- Activation script path correct for the shell type? +- Does it handle spaces and special characters in paths? +- Is the activation command timing out appropriately? +- Terminal name set correctly? (should be `Python: {filename}` not shell name) +- Terminal auto-reveal after `Run Python File`? +- Race condition between activation and script execution? + +**Common Terminal Issues:** + +- Shell execution timeout before activation completes +- Wrong environment shown after activation +- Newline added after shellStartup activation (fish shell) +- Terminal name showing shell type instead of Python context +- Terminal not auto-revealing when running script with input() (command mode) +- KeyboardInterrupt race with PS1 activation script (command mode) +- Git Bash on Windows: backslash paths not escaped properly + +**Shell-Specific Issues:** + +- **Fish**: `XDG_CONFIG_HOME` ignored for shellStartup, extra newline after activation +- **Bash on Windows**: Paths like `D:\path\file.py` show as `command not found` (missing escaping) +- **PowerShell**: PS1 activation races with script execution +- **cmd**: Conda `activate.bat` path quoting issues + +### Settings Precedence + +**VS Code settings have strict precedence that must be respected:** + +```typescript +// RED FLAG: Using getConfiguration() without scope +const config = vscode.workspace.getConfiguration('python-envs'); +const value = config.get('pythonProjects'); // Missing workspace context! + +// REQUIRED: Pass scope for workspace folder settings +const config = vscode.workspace.getConfiguration('python-envs', workspaceFolder); +const value = config.get('pythonProjects'); +``` + +```typescript +// RED FLAG: Not using inspect() when checking explicit values +const config = vscode.workspace.getConfiguration('python'); +if (config.get('useEnvironmentsExtension')) { // May return defaultValue! + +// REQUIRED: Use inspect() and check explicit values only +const inspected = config.inspect('useEnvironmentsExtension'); +const hasExplicitValue = inspected?.globalValue !== undefined || + inspected?.workspaceValue !== undefined || + inspected?.workspaceFolderValue !== undefined; +``` + +**Settings Precedence Order (highest to lowest):** + +1. Workspace folder value +2. Workspace value +3. User/global value +4. Default value (may come from other extensions!) + +**Common Settings Issues:** + +- `pythonProjects` settings overwriting project-specific configs on reload +- Multi-root workspace settings missing `workspace` property +- Default values from other extensions' `package.json` being used + +### Environment Discovery + +**Environment managers have specific discovery patterns:** + +```typescript +// RED FLAG: Not handling undefined before .map() +const envs = await conda.getEnvironments(); +const names = envs.map((e) => e.name); // Crashes if envs is undefined! + +// REQUIRED: Defensive check before operations +const envs = await conda.getEnvironments(); +if (!envs) { + traceLog('No environments returned from conda'); + return []; +} +const names = envs.map((e) => e.name); +``` + +**Manager-Specific Discovery Patterns:** + +- **Poetry**: Check `POETRY_VIRTUALENVS_IN_PROJECT` env var, `poetry.toml`, `{cache-dir}` placeholder +- **Pipenv**: Check `WORKON_HOME`, `XDG_DATA_HOME/virtualenvs` +- **Pyenv**: Windows uses `pyenv-win` with different directory structure +- **Conda**: Windows registry paths, `conda-meta/` directory +- **venv**: `pyvenv.cfg` file, version extraction + +**Environment Variable Precedence (Poetry example):** + +1. Local config (`poetry.toml` in project) +2. Environment variables (`POETRY_VIRTUALENVS_IN_PROJECT`) +3. Global config (`config.toml`) + +**Known Environment Discovery Bugs:** + +- **Poetry**: `{cache-dir}` placeholder not resolved in paths; wrong default virtualenvs path on Windows/macOS +- **Pipenv**: Missing `WORKON_HOME` and `XDG_DATA_HOME` env var support +- **Pyenv**: Windows path calculation bug with `path.normalize()` vs `path.resolve()` +- **Conda**: Fish shell uses bash-style `source activate` instead of fish-compatible command + +### PET Server Communication + +**The native PET server (Rust) communicates via JSON-RPC. Failures are subtle:** + +```typescript +// RED FLAG: No timeout on JSON-RPC calls +const envs = await petServer.getEnvironments(); // Can hang indefinitely! + +// REQUIRED: Implement timeout with fallback +const envs = await Promise.race([ + petServer.getEnvironments(), + new Promise((_, reject) => setTimeout(() => reject(new Error('PET server timeout')), 30000)), +]); +``` + +**PET Server Issues from Past Bugs:** + +- No timeout on JSON-RPC calls → discovery stuck indefinitely +- Spawn errors continue silently (extension continues without environments) +- Worker pool and disposables not cleaned up (resource leaks) +- Type guards missing for JSON-RPC responses +- `JSON.stringify()` used for object comparison (inefficient and fragile) +- Cache key collision when paths normalize to same value + +**PET Server Checklist:** + +- JSON-RPC calls have timeout? +- Spawn errors surfaced to user or logged? +- Resources disposed on extension deactivation? +- Response types validated before use? +- Cache invalidation on environment changes? + +### API Type Safety + +**The public API must handle all documented input types:** + +```typescript +// RED FLAG: Assuming Uri when API accepts Uri | string +async runInDedicatedTerminal(terminalKey: Uri | string, ...): Promise { + const fsPath = terminalKey.fsPath; // Crashes when string is passed! + +// REQUIRED: Handle both types +async runInDedicatedTerminal(terminalKey: Uri | string, ...): Promise { + const keyPart = terminalKey instanceof Uri + ? path.normalize(terminalKey.fsPath) + : terminalKey; +``` + +**API Checklist:** + +- All union types handled (`Uri | string`, etc.) +- `envId` vs `id` - use `env.envId.id`, not `env.id` +- API is flat: `api.getEnvironments()`, NOT `api.environments.getEnvironments()` +- Return types match documentation + +### Multi-Workspace Support + +**Multi-root workspaces require special handling:** + +```typescript +// RED FLAG: Assuming single workspace folder +const workspaceFolder = vscode.workspace.workspaceFolders?.[0]; + +// REQUIRED: Handle multiple workspace folders or find correct one +const workspaceFolder = vscode.workspace.getWorkspaceFolder(documentUri) ?? vscode.workspace.workspaceFolders?.[0]; +``` + +**Multi-Workspace Checklist:** + +- `pythonProjects` setting includes `workspace` property for multi-root +- Environment selection per-project, not per-workspace +- Terminal activation respects project-level settings +- Settings changes don't override other projects' configurations +- Status bar shows correct environment for active file's folder +- Debugger uses correct environment for the file being debugged (not root workspace) + +**Common Multi-Workspace Bugs:** + +- Venv for first workspace overwritten by default selection +- Debugger uses root workspace's environment instead of file's workspace +- Status bar not updating when switching between workspace folders +- Terminal environment contributions use last activated environment for all folders +- Non-Python projects in workspace still get Python activation (Vue, Electron) + +### Interpreter Selection Persistence + +**Interpreter selections must persist correctly across restarts:** + +```typescript +// RED FLAG: Writing to settings without checking previous state +await config.update('pythonPath', newPath, ConfigurationTarget.Workspace); // Overwrites user's explicit choice! + +// REQUIRED: Check for explicit user values first +const inspected = config.inspect('pythonPath'); +if (inspected?.workspaceValue === undefined) { + // Only set if user hasn't explicitly chosen + await config.update('pythonPath', newPath, ConfigurationTarget.Workspace); +} +``` + +**Persistence Issues from Past Bugs:** + +- Interpreter selection not saved, resets to system interpreter on restart +- `python.defaultInterpreterPath` overwritten on first startup +- Environment selection in status bar doesn't match actual active environment +- `.venv` in workspace not listed in interpreter picker (must browse manually) +- Interpreter set via "Enter path..." not persisted + +### Extension Compatibility + +**The extension must coexist with other Python tooling:** + +```typescript +// RED FLAG: Assuming exclusive control of terminal activation +terminal.shellIntegration.executeCommand('activate'); // May conflict with other extensions! + +// REQUIRED: Check for conflicts and coordinate +if (!this.hasExternalActivation(terminal)) { + terminal.shellIntegration.executeCommand('activate'); +} +``` + +**Compatibility Issues:** + +- Auto-installed when `ms-python.python` is installed (devcontainer users report breakage) +- Conflicts with python-envy extension (Don's Manager) +- Overwrites interpreter path set by other extensions +- "Go to Definition" breaks for some users after installation +- Extension assumes it's the only environment manager + +**Devcontainer/WSL-Specific Issues:** + +- Extension auto-installs in devcontainer even when not wanted +- WSL paths not translated correctly +- Remote extension host has different environment than expected + +### Localization + +**All user-facing messages must use VS Code's l10n API:** + +```typescript +// RED FLAG: Hardcoded user-facing string +vscode.window.showErrorMessage('Failed to discover environments'); + +// REQUIRED: Use l10n.t() for localization +import * as l10n from '@vscode/l10n'; +vscode.window.showErrorMessage(l10n.t('Failed to discover environments')); +``` + +**Localization Rules:** + +- User-facing messages: MUST use `l10n.t()` +- Log messages: Do NOT need localization +- Error buttons/actions: MUST use `l10n.t()` +- Command palette entries: Use `package.nls.json` + +### Logging + +**Use the extension's logging utilities:** + +```typescript +// RED FLAG: Using console.log +console.log('Discovered environment:', env); + +// REQUIRED: Use extension logging utilities +import { traceLog, traceVerbose } from './common/logging'; +traceLog('Discovered environment:', env.name); +traceVerbose('Environment details:', JSON.stringify(env)); +``` + +### Accessibility + +**UI components must be accessible:** + +```typescript +// RED FLAG: Missing accessibility labels +const treeItem = new vscode.TreeItem('Create Environment'); +treeItem.command = ...; + +// REQUIRED: Add accessibility description +const treeItem = new vscode.TreeItem('Create Environment'); +treeItem.accessibilityInformation = { + label: l10n.t('Create a new Python environment'), + role: 'button' +}; +``` + +**Accessibility Checklist (WCAG 2.1 AA):** + +- Tree view items have accessibility labels +- Icon-only buttons have ARIA labels +- Screen reader announces state changes +- Focus management in quick picks and tree views + +### Project Identification + +**Python project detection must be precise:** + +```typescript +// RED FLAG: Assuming any folder with Python files is a Python project +if (await hasFileWithExtension(folder, '.py')) { + registerAsPythonProject(folder); // Could be Vue/Electron project with scripts! + +// REQUIRED: Check for Python project markers +const hasPyprojectToml = await fileExists(path.join(folder, 'pyproject.toml')); +const hasRequirements = await fileExists(path.join(folder, 'requirements.txt')); +const hasSetupPy = await fileExists(path.join(folder, 'setup.py')); +if (hasPyprojectToml || hasRequirements || hasSetupPy) { + registerAsPythonProject(folder); +} +``` + +--- + +## Higher-Order Thinking + +### The "What If" Questions + +- What if the user has 500+ environments? +- What if conda returns malformed JSON? +- What if the `pyvenv.cfg` file exists but has no version field? +- What if the path has spaces, Unicode characters, or is on a mapped drive? +- What if two environment managers claim the same environment? +- What if the terminal is opened before shell integration is ready? +- What if the user changes settings while discovery is in progress? +- What if this runs in a devcontainer or WSL? +- What if the PET server process crashes silently? +- What if JSON-RPC hangs indefinitely with no timeout? +- What if poetry.toml has `{cache-dir}` placeholder that isn't resolved? +- What if the user has `WORKON_HOME` or `XDG_DATA_HOME` set? +- What if the shell is fish and uses bash-style activation command? +- What if the terminal needs user input but hasn't auto-revealed? +- What if the user has both python-envy and this extension installed? +- What if discovery takes 30+ seconds and user opens interpreter picker? +- What if the user manually entered interpreter path via "Enter path..."? +- What if the file being debugged is outside all workspace folders? +- What if a non-Python project (Vue/Electron) is in a multi-root workspace? + +### The "Who Else" Questions + +- Who else calls this function? +- Does the Python extension depend on this API response shape? +- Does Jupyter use this environment information? +- Are there third-party extensions using the API? +- Does the PET server expect this format? + +### The "Why Not" Questions + +- Why not use `path.resolve()` instead of `path.normalize()`? +- Why not use `config.inspect()` to check explicit values? +- Why not add a null check before `.map()`? +- Why not localize this user-facing message? +- Why not add an accessibility label to this component? + +--- + +## Blind Spots to Actively Check + +| What Gets Scrutinized | What Slips Through | +| ------------------------- | ------------------------------------ | +| Syntax and types | Cross-platform path handling | +| Test existence | Test coverage on all platforms | +| Individual manager logic | Cross-manager interactions | +| Happy path settings | Settings precedence edge cases | +| Single workspace behavior | Multi-root workspace handling | +| English UI text | L10n for non-English locales | +| Visual appearance | Screen reader accessibility | +| Uri parameters | Uri \| string union type handling | +| Environment discovery | Environment variable precedence | +| Terminal activation | Terminal reveal timing for input() | +| PET server calls | JSON-RPC timeouts and error handling | +| Extension activation | Extension compatibility with others | +| Happy path shells | Fish, Git Bash on Windows edge cases | +| Initial environment setup | Persistence across restarts | +| Workspace folder settings | Files outside all workspace folders | + +### Things Rarely Questioned + +1. **path.normalize() vs path.resolve()** — Windows needs resolve() for drive letters +2. **Settings scope parameter** — Missing scope causes workspaceFolderValue to be undefined +3. **inspect() vs get()** — get() returns defaultValue from other extensions +4. **Terminal activation timing** — Commands may execute before shell is ready +5. **Environment variable precedence** — POETRY_VIRTUALENVS_IN_PROJECT, WORKON_HOME +6. **Multi-root workspace property** — pythonProjects needs `workspace` field +7. **Defensive null checks** — .map() on undefined crashes extensions +8. **Platform-specific cache paths** — Different defaults on Windows/macOS/Linux +9. **Fish shell differences** — XDG_CONFIG_HOME, different activation syntax +10. **PET server timeouts** — JSON-RPC calls can hang forever +11. **Resource cleanup** — Worker pools, disposables must be cleaned up +12. **Terminal reveal timing** — Must reveal before script execution for input() + +### Known Regression Triggers + +**These specific areas have caused regressions in the past:** + +| Area | Recent Issue | Pattern | +| -------------------------------- | ------------ | -------------------------------------------------- | +| Interpreter selection status bar | #1124 | Selection not updating when switching environments | +| Terminal auto-reveal | #1128 | Terminal doesn't show until script completes | +| `defaultInterpreterPath` | #1082 | Overwritten on first startup | +| Discovery stuck | #1081, #1140 | No timeout on JSON-RPC, PET spawn errors silent | +| Settings reload | #1200 | Root project overrides project configs | +| Multi-root Pixi | #1107 | Wrong environment activated in terminal | +| Poetry virtualenvs path | #1182 | Wrong default path on Windows/macOS | +| PyEnv Windows | #1180 | `path.normalize()` vs `path.resolve()` | +| Bash on Windows | #1123 | Path backslashes not escaped | + +**Before merging changes to these areas, run manual tests on:** + +- Windows (cmd, PowerShell, Git Bash) +- macOS (zsh, bash) +- Linux (bash, fish) + +--- + +## Output Format + +```markdown +## Review Findings + +### Critical (Blocks Merge) + +Cross-platform path bugs causing crashes, API type safety violations, settings precedence bugs overwriting user configs, terminal activation breaking core workflows + +### Important (Should Fix) + +Missing null checks, localization gaps, accessibility regressions, environment discovery edge cases, multi-workspace handling issues + +### Suggestions (Consider) + +Logging improvements, code clarity, test coverage gaps, performance opportunities, better error messages + +### Questions (Need Answers) + +Design decisions that need justification before proceeding +``` + +If clean: `## Review Complete — LGTM` + +--- + +## Instructions + +1. Get list of changed files: + ```bash + git diff --name-only HEAD # Uncommitted changes + git diff --name-only origin/main # All changes vs main branch + ``` +2. Understand the context (what issue, what manager, what platform concerns) +3. Read each changed file and related code +4. Check if changes touch settings, paths, terminal, or API surfaces +5. Apply thinking questions, not just checklist +6. Report findings with file:line references + +## Before Approving, Run These Commands + +```bash +# Type check +npm run compile-tests + +# Lint check +npm run lint + +# Build extension (required for E2E tests) +npm run compile + +# Run unit tests +npm run unittest +``` + +## Critical Scenarios to Test Manually + +For changes to high-risk files, test these scenarios: + +**Terminal Activation Changes:** + +1. Open terminal with shellStartup activation → verify environment active +2. Open terminal with command activation → verify environment active +3. Run script with `input()` → verify terminal auto-reveals +4. Test on fish shell → verify no syntax errors +5. Test on Git Bash (Windows) → verify path escaping works + +**Settings Changes:** + +1. Set interpreter at workspace folder level → restart → verify persisted +2. Set interpreter in multi-root workspace → verify correct per-folder +3. Change `python.defaultInterpreterPath` → verify not overwritten + +**Environment Discovery Changes:** + +1. Create poetry project with `{cache-dir}` in config → verify resolved +2. Set `WORKON_HOME` env var → verify pipenv environments found +3. Test with 50+ environments → verify no performance regression +4. Test with malformed config files → verify graceful handling + +**Multi-Workspace Changes:** + +1. Open workspace with Python and non-Python projects → verify non-Python not registered +2. Debug file in nested workspace folder → verify correct interpreter used +3. Switch between folders → verify status bar updates + +## Don't Be Afraid To + +- **Ask "dumb" questions** — Path handling bugs are subtle and common +- **Question the platform assumptions** — Windows behaves differently +- **Flag missing null checks** — `.map()` on undefined has caused real crashes +- **Challenge settings usage** — Precedence bugs corrupt user configurations +- **Request multi-root testing** — Single-workspace works ≠ multi-root works +- **Check localization** — User-facing strings need l10n.t() +- **Question terminal timing** — Race conditions are hard to reproduce +- **Ask about PET server error handling** — Silent failures cause discovery stuck +- **Question environment variable support** — Poetry/Pipenv use env vars for config +- **Check fish shell handling** — Different syntax than bash/zsh +- **Flag missing timeouts** — JSON-RPC calls can hang forever +- **Question persistence logic** — Settings may be overwritten unexpectedly + +## High-Risk Files (Extra Scrutiny) + +Based on past issues, these files have the highest bug density: + +| File Path | Risk Area | Common Issues | +| ------------------------------------------- | --------------------- | --------------------------------------------------------------------------- | +| `src/managers/common/nativePythonFinder.ts` | Environment discovery | Type guards, JSON.stringify comparison, cache key collision, resource leaks | +| `src/features/terminal/terminalManager.ts` | Terminal activation | Timing issues, shell detection, reveal logic | +| `src/managers/poetry/poetryUtils.ts` | Poetry manager | {cache-dir} placeholder, env var precedence, platform-specific paths | +| `src/managers/pyenv/pyenvUtils.ts` | PyEnv manager | Windows path calculation, path.resolve() vs path.normalize() | +| `src/managers/pipenv/pipenvUtils.ts` | Pipenv manager | WORKON_HOME, XDG_DATA_HOME env var support | +| `src/features/settings/*.ts` | Settings | Precedence, inspect() vs get(), scope parameter | +| `src/managers/conda/condaUtils.ts` | Conda manager | Fish shell activation, registry paths | +| `src/features/interpreterSelection.ts` | Interpreter selection | Persistence, status bar updates, multi-workspace | + +## Skip (Handled Elsewhere) + +- Style/formatting → ESLint + Prettier +- Type errors → TypeScript compiler +- Test failures → CI / GitHub Actions +- Spelling → not your job +- PET server Rust code → separate repository From fcca3e8d61ab576f491b49e047e7447959f9a173 Mon Sep 17 00:00:00 2001 From: Karthik Nadig Date: Fri, 13 Feb 2026 11:30:15 -0800 Subject: [PATCH 3/7] add skills --- .github/agents/maintainer.agent.md | 546 ++++-------------- .github/agents/reviewer.agent.md | 12 + .github/hooks/maintainer-hooks.json | 50 ++ .github/hooks/scripts/README.md | 51 ++ .github/hooks/scripts/post_tool_use.py | 131 +++++ .github/hooks/scripts/session_start.py | 178 ++++++ .github/hooks/scripts/stop_hook.py | 130 +++++ .github/hooks/scripts/subagent_stop.py | 44 ++ .github/skills/cross-platform-paths/SKILL.md | 203 +++++++ .github/skills/generate-snapshot/SKILL.md | 141 +++++ .github/skills/manager-discovery/SKILL.md | 329 +++++++++++ .github/skills/run-pre-commit-checks/SKILL.md | 132 +++++ .github/skills/settings-precedence/SKILL.md | 260 +++++++++ 13 files changed, 1776 insertions(+), 431 deletions(-) create mode 100644 .github/hooks/maintainer-hooks.json create mode 100644 .github/hooks/scripts/README.md create mode 100644 .github/hooks/scripts/post_tool_use.py create mode 100644 .github/hooks/scripts/session_start.py create mode 100644 .github/hooks/scripts/stop_hook.py create mode 100644 .github/hooks/scripts/subagent_stop.py create mode 100644 .github/skills/cross-platform-paths/SKILL.md create mode 100644 .github/skills/generate-snapshot/SKILL.md create mode 100644 .github/skills/manager-discovery/SKILL.md create mode 100644 .github/skills/run-pre-commit-checks/SKILL.md create mode 100644 .github/skills/settings-precedence/SKILL.md diff --git a/.github/agents/maintainer.agent.md b/.github/agents/maintainer.agent.md index 88275279..7ed54c54 100644 --- a/.github/agents/maintainer.agent.md +++ b/.github/agents/maintainer.agent.md @@ -52,45 +52,49 @@ tools: **vscode-python-environments** — A VS Code extension providing a unified Python environment experience. Manages environment discovery, creation, selection, terminal activation, and package management across multiple Python managers. -**Stack:** - -- **Language:** TypeScript (Node.js, VS Code Extension API) -- **Build:** Webpack + TypeScript compiler -- **Test Framework:** Mocha + Sinon for unit tests, @vscode/test-cli for E2E/integration -- **Environment Managers Supported:** venv, conda, poetry, pipenv, pyenv, pixi, uv -- **Native Component:** PET (Python Environment Tools) — Rust-based locator server for fast environment discovery +**Stack:** TypeScript, Node.js, VS Code Extension API, Webpack, Mocha/Sinon, PET (Rust locator) **Key Architecture:** -- `src/managers/` — Environment manager implementations (one folder per manager) -- `src/features/` — Core features (terminal activation, settings, views, execution) -- `src/common/` — Shared utilities, APIs, and error handling -- `src/api.ts` — Public API surface for other extensions +- `src/managers/` — Environment manager implementations (venv, conda, poetry, pipenv, pyenv, pixi, uv) +- `src/features/` — Core features (terminal, settings, views, execution) +- `src/common/` — Shared utilities and APIs - `analysis/` — Python scripts for codebase health snapshots -**Critical Guidelines (from `.github/instructions/generic.instructions.md`):** +--- -- **Cross-platform paths:** Use `path.join()` or `path.resolve()`, never hardcode POSIX paths -- **Settings precedence:** Always pass scope to `getConfiguration()`, use `inspect()` for explicit values -- **Localization:** All user-facing messages use `l10n.t()`, logs don't need localization -- **Logging:** Use `traceLog`/`traceVerbose`, never `console.log` +# Available Skills -**CLI tools:** +Load these skills on-demand for detailed knowledge: -- `npm` for build, test, lint -- `gh` CLI for GitHub interactions -- `python` for running analysis scripts +| Skill | When to Use | +| ------------------------ | ------------------------------------------------- | +| `/generate-snapshot` | Generate codebase health snapshot for planning | +| `/run-pre-commit-checks` | Run mandatory checks before committing | +| `/cross-platform-paths` | Reviewing/writing path-related code | +| `/settings-precedence` | Reviewing/writing settings code | +| `/manager-discovery` | Working on specific manager (poetry, conda, etc.) | + +--- + +# Automated Hooks + +These hooks run automatically (configured in `.github/hooks/`): + +| Hook | What it Does | +| ---------------- | ---------------------------------------------------------------- | +| **SessionStart** | Injects git context, open issues, available skills | +| **PostToolUse** | Runs ESLint on edited TypeScript files | +| **Stop** | Blocks if uncommitted TS changes exist without pre-commit checks | --- # Workflow Overview ``` -CI (snapshot) → Planning → Development → Review → Merge +Planning → Development → Review → Merge ``` -All work follows this loop. No shortcuts. - --- # Planning Phase @@ -99,89 +103,20 @@ All work follows this loop. No shortcuts. 1. **Gather context:** - Check open GitHub issues (`github/list_issues`, `github/search_issues`) - - Review labeled issues (bugs, enhancements, manager-specific) - - Generate or download the latest codebase snapshot (see Snapshot Usage below) + - Generate snapshot: use `/generate-snapshot` skill for details - Check open PRs for related work 2. **Analyze and prioritize:** - - Cross-reference open issues against snapshot hotspots and debt indicators - - Identify bugs vs enhancements vs chores - - Consider cross-platform impact (changes affecting Windows, macOS, Linux) - - Factor in manager dependencies (changes to `common/` affect many managers) + - Cross-reference issues against snapshot `priority_hotspots` and `debt_indicators` + - Consider cross-platform impact (Windows, macOS, Linux) - Use snapshot data to identify refactoring opportunities 3. **Present a curated priority list:** - - Show 3–5 actionable work items ranked by impact and readiness - - For each item: brief description, affected components, estimated complexity + - 3–5 actionable items ranked by impact + - For each: brief description, affected components, complexity - Recommend the top pick with reasoning -4. **User picks a work item** → proceed to Development Phase - -### Snapshot Usage - -The codebase snapshot is generated by the analysis scripts in `analysis/`. Run it locally to get current metrics. - -**To generate a snapshot:** - -```powershell -# Ensure Python is available -cd analysis -python snapshot.py --output ./analysis-snapshot.json -``` - -**Snapshot structure** (`analysis-snapshot.json`): - -```json -{ - "metadata": { - "schema_version": "1.0.0", - "generated_at": "ISO timestamp", - "git": { "sha": "...", "branch": "...", "message": "..." } - }, - "summary": { - "files_with_changes": N, - "total_changes": N, - "total_churn": N, - "high_complexity_files": N, - "todo_count": N, - "fixme_count": N, - "circular_dependency_count": N, - "single_author_file_ratio": 0.0 - }, - "priority_hotspots": [ - { "path": "...", "change_count": N, "churn": N, "max_complexity": N, "priority_score": N } - ], - "git_analysis": { - "hotspots": [...], - "temporal_coupling": [...], - "bus_factor": { "total_authors": N, "knowledge_silos": [...] } - }, - "complexity": { - "by_language": { "typescript": [...], "python": [...] }, - "high_complexity_functions": [...] - }, - "debt_indicators": { - "debt_markers": { "by_type": { "TODO": [...], "FIXME": [...] } }, - "large_files": { "files": [...] }, - "long_functions": { "functions": [...] } - }, - "dependencies": { - "circular_dependencies": [...], - "highly_coupled_modules": [...], - "hub_modules": [...], - "layer_violations": [...] - } -} -``` - -**How to interpret:** - -- **priority_hotspots** (sorted by priority_score desc): Files that change often AND are complex — prime refactoring candidates -- **temporal_coupling**: File pairs with high coupling ratio indicate hidden dependencies -- **knowledge_silos**: Files with single author — bus factor risk -- **debt_markers.by_type.FIXME**: High-priority technical debt -- **circular_dependencies**: Architectural issues requiring attention -- **hub_modules**: High fan-in modules — changes here affect many files +4. **User picks** → proceed to Development Phase --- @@ -192,8 +127,7 @@ python snapshot.py --output ./analysis-snapshot.json Every piece of work starts with a GitHub issue — no exceptions. - Search for duplicates first (`github/search_issues`) -- Create the issue with a clear title, description, and labels -- Link to relevant manager or feature area if applicable +- Create issue with clear title, description, and labels ## 2. Create a Feature Branch @@ -202,77 +136,51 @@ git checkout main; git pull git checkout -b feature/issue-N # or bug/issue-N, chore/issue-N ``` -Use the issue number in the branch name for traceability. - ## 3. Implement Changes -- Follow TypeScript conventions and the project's patterns -- Write/update tests alongside code -- Keep changes focused — one issue per branch -- Follow the critical guidelines from `.github/instructions/generic.instructions.md` +Follow guidelines from `.github/instructions/generic.instructions.md`: -### Code Conventions +- **Paths:** Use `/cross-platform-paths` skill for patterns +- **Settings:** Use `/settings-precedence` skill for patterns +- **Managers:** Use `/manager-discovery` skill for manager-specific knowledge +- **Localization:** `l10n.t()` for user-facing messages +- **Logging:** `traceLog`/`traceVerbose`, never `console.log` -- **Cross-platform paths:** Use `path.join()`, `path.resolve()`, never concatenate with `/` -- **Settings:** Always pass scope to `getConfiguration()`, use `inspect()` for explicit value checks -- **Localization:** `l10n.t()` for user-facing messages, no localization for logs -- **Logging:** Use `traceLog`, `traceVerbose` — never `console.log` -- **API types:** Handle union types (`Uri | string`), use `env.envId.id` not `env.id` -- **Thread safety:** Be careful with async operations, avoid race conditions -- **Tests:** Follow patterns in `src/test/`, use mocks from `src/test/mocks/` +### High-Risk Areas -### High-Risk Areas (Extra Scrutiny Required) +These areas require extra scrutiny: -Based on past issues, these areas have high bug density: - -| File Area | Risk | Common Issues | -| ------------------------------------------- | --------------------- | -------------------------------------------- | -| `src/managers/common/nativePythonFinder.ts` | Environment discovery | Type guards, cache issues, resource leaks | -| `src/features/terminal/` | Terminal activation | Timing issues, shell detection, reveal logic | -| `src/managers/poetry/` | Poetry manager | Cache-dir placeholder, env var precedence | -| `src/managers/pyenv/` | PyEnv manager | Windows path calculation | -| `src/features/settings/` | Settings | Precedence, inspect() vs get() | -| `src/features/interpreterSelection.ts` | Interpreter selection | Persistence, multi-workspace | +| Area | Common Issues | +| ------------------------------------------- | ------------------------------------- | +| `src/managers/common/nativePythonFinder.ts` | Type guards, cache, resource leaks | +| `src/features/terminal/` | Timing, shell detection, reveal logic | +| `src/managers/poetry/` | {cache-dir} placeholder, env vars | +| `src/managers/pyenv/` | Windows path calculation | +| `src/features/settings/` | Precedence, inspect() vs get() | ## 4. Self-Review (MANDATORY) **Before every commit, invoke the Reviewer agent as a sub-agent.** -This is non-negotiable. Every code change must pass through the Reviewer agent before being committed. - -### How to invoke: +Run **Reviewer** agent (`.github/agents/reviewer.agent.md`) with: -Run the **Reviewer** agent (`.github/agents/reviewer.agent.md`) as a sub-agent with instructions to: +1. Get changed files: `git diff --name-only` +2. Read and review each file +3. Report: Critical / Important / Suggestions / Questions -1. Get the list of changed files (`git diff --name-only`) -2. Read and review each changed file -3. Report findings in the standard format (Critical / Important / Suggestions / Questions) - -### Handling Reviewer findings: - -- **Critical:** Must fix before committing. Fix the issue, then re-run the Reviewer agent. -- **Important:** Should fix before committing. Fix and re-run. -- **Suggestions:** Apply if reasonable, note if deferred. -- **Questions:** Answer them — if you can't justify the decision, reconsider it. - -**Loop until the Reviewer agent returns clean or only has minor suggestions.** +**Loop until Reviewer returns clean or only minor suggestions.** ## 5. Pre-Commit Checks (REQUIRED) -Before committing, always run: +Use `/run-pre-commit-checks` skill for details. Quick reference: ```powershell -# Lint check (required) -npm run lint - -# Type check (required) -npm run compile-tests - -# Run unit tests (required) -npm run unittest +npm run lint # ESLint +npm run compile-tests # TypeScript +npm run unittest # Mocha ``` -If lint or tests fail, fix them before committing. +**Note:** The `PostToolUse` hook automatically runs ESLint on edited files. ## 6. Commit @@ -280,25 +188,16 @@ Format: `[type]: brief description (Fixes #N)` Types: `feat`, `fix`, `chore`, `docs`, `refactor`, `test` -```powershell -git add -A -git commit -m "feat: add pyenv-win support for Windows (Fixes #42)" -``` - ## 7. Push & Create PR ```powershell git push -u origin feature/issue-N ``` -Create a PR via `github/create_pull_request`: +Create PR via `github/create_pull_request`: -- **Title:** Same as commit message (or summarized if multiple commits) -- **Body:** Keep it concise: - - 1–2 sentence summary of what and why - - Brief bullet list of key changes (5–10 items max) - - `Fixes #N` to auto-close the issue -- **Do NOT** write marketing copy, exhaustive file lists, or before/after comparisons +- **Title:** Same as commit message +- **Body:** 1-2 sentence summary + bullet list (5-10 items) + `Fixes #N` --- @@ -308,303 +207,88 @@ Create a PR via `github/create_pull_request`: ## 1. Request Copilot Review -After pushing and creating the PR, request review from Copilot using `github/request_copilot_review`. +After creating PR: `github/request_copilot_review` ## 2. Wait for Review -Poll for review completion: - -- Wait ~2 minutes initially -- Then poll every 30 seconds +- Wait ~2 minutes initially, then poll every 30 seconds - Maximum wait: 8 minutes total - -``` -github/pull_request_read (method: get_review_comments) → check for comments -``` +- Use: `github/pull_request_read (method: get_review_comments)` ## 3. Handle Review Comments -If review comments exist: - 1. Read and understand each comment -2. Determine if the comment is actionable (not just informational or positive feedback) -3. Make the necessary code fixes for actionable comments -4. **Re-run the Reviewer agent on the fixes** (mandatory — same as step 4 in Development) -5. **Run pre-commit checks** (`npm run lint`, `npm run compile-tests`, `npm run unittest`) -6. **Resolve addressed review threads** using `gh` CLI: +2. Make fixes for actionable comments +3. **Re-run Reviewer agent on fixes** (mandatory) +4. **Run pre-commit checks** +5. Resolve addressed threads: - ```powershell - # Get thread IDs - gh api graphql -f query='{ - repository(owner: "microsoft", name: "vscode-python-environments") { - pullRequest(number: N) { - reviewThreads(first: 50) { - nodes { id isResolved } - } - } - } - }' - - # Resolve each addressed thread - gh api graphql -f query='mutation { - resolveReviewThread(input: {threadId: "THREAD_ID"}) { - thread { isResolved } - } - }' - ``` +```powershell +# Get thread IDs +gh api graphql -f query='{ + repository(owner: "microsoft", name: "vscode-python-environments") { + pullRequest(number: N) { + reviewThreads(first: 50) { nodes { id isResolved } } + } + } +}' -7. Commit the fixes: `fix: address review feedback (PR #N)` -8. Push the fixes -9. Re-request Copilot review (`github/request_copilot_review`) -10. Wait and poll again (repeat from step 2) +# Resolve each thread +gh api graphql -f query='mutation { + resolveReviewThread(input: {threadId: "THREAD_ID"}) { + thread { isResolved } + } +}' +``` + +6. Commit: `fix: address review feedback (PR #N)` +7. Push and re-request Copilot review +8. Repeat from step 2 ## 4. Review Complete -Review is considered complete when: +Review complete when: -- A new review comes back with **no actionable comments**, OR -- The PR is **Approved**, OR -- After re-requesting review, a full polling cycle (8 min) completes and `github/pull_request_read (get_review_comments)` shows **no unresolved + non-outdated threads** +- No actionable comments, OR +- PR is Approved, OR +- 8 min polling with no unresolved threads -**DO NOT suggest merging** until one of these conditions is met. +**DO NOT suggest merging** until one condition is met. --- # Merge & Cleanup -Once review is complete and all checks pass: - -1. **Merge the PR:** - - ``` - github/merge_pull_request - ``` - -2. **Delete the feature branch:** - +1. **Merge:** `github/merge_pull_request` +2. **Delete branch:** ```powershell git checkout main; git pull git branch -d feature/issue-N ``` - - If the branch was squash-merged and `git branch -d` complains, use `git branch -D` after verifying the work is on main. - - Skip `git push origin --delete ` if GitHub already auto-deleted the remote branch. - -3. **CI triggers:** Push to main runs the full CI pipeline. Consider generating a fresh snapshot for future planning. - ---- - -# On-Demand Tasks - -## "Check what needs work" - -Run the Planning Phase flow above. - -## "Review this code" - -Invoke the Reviewer agent on the specified files or current changes. - -## "Create an issue for X" - -Search for duplicates, then create a well-formatted issue with labels. - -## "Run tests" - -```powershell -# Unit tests only -npm run unittest - -# Build extension (required for smoke/E2E/integration) -npm run compile - -# Smoke tests (real VS Code instance) -npm run smoke-test - -# Integration tests -npm run integration-test - -# E2E tests -npm run e2e-test -``` - -## "Check for lint/type errors" - -```powershell -# Lint check -npm run lint - -# Type check -npm run compile-tests -``` - -## "Generate a snapshot" - -```powershell -cd analysis -python snapshot.py --output ./analysis-snapshot.json --pretty -``` - -## "Build the extension" - -```powershell -# Development build -npm run compile - -# Production build -npm run package - -# Package VSIX -npm run vsce-package -``` +3. **CI triggers** on push to main --- # Principles -1. **Issue-first:** No code without an issue. No branch without an issue number. -2. **Review-always:** The Reviewer agent runs before every commit. No exceptions. -3. **Small PRs:** One issue, one branch, one focused PR. Split large work into sub-issues. -4. **Cross-platform first:** Always consider Windows, macOS, and Linux behavior differences. -5. **Settings precedence:** Respect VS Code's workspace folder → workspace → user order. -6. **User decides scope:** Present options, let the user choose. Don't unilaterally decide priorities. -7. **Ship clean:** Every merge leaves the repo better than before. No "fix later" debt without an issue. - ---- - -# Critical Patterns to Enforce - -## Cross-Platform Paths - -```typescript -// WRONG: POSIX-style path -const envPath = homeDir + '/.venv/bin/python'; - -// RIGHT: Use path.join -const envPath = path.join(homeDir, '.venv', 'bin', 'python'); -``` - -```typescript -// WRONG: path.normalize for comparisons on Windows -const normalized = path.normalize(fsPath); - -// RIGHT: Use path.resolve on BOTH sides -const normalized = path.resolve(fsPath); -const other = path.resolve(e.environmentPath.fsPath); -``` - -## Settings Precedence - -```typescript -// WRONG: Missing scope -const config = vscode.workspace.getConfiguration('python-envs'); - -// RIGHT: Pass scope for workspace folder settings -const config = vscode.workspace.getConfiguration('python-envs', workspaceFolder); -``` - -```typescript -// WRONG: Using get() when checking explicit values -if (config.get('useEnvironmentsExtension')) { -} - -// RIGHT: Use inspect() and check explicit values only -const inspected = config.inspect('useEnvironmentsExtension'); -const hasExplicitValue = - inspected?.globalValue !== undefined || - inspected?.workspaceValue !== undefined || - inspected?.workspaceFolderValue !== undefined; -``` - -## Localization - -```typescript -// WRONG: Hardcoded user message -vscode.window.showErrorMessage('Failed to discover environments'); - -// RIGHT: Use l10n.t() -import * as l10n from '@vscode/l10n'; -vscode.window.showErrorMessage(l10n.t('Failed to discover environments')); -``` - -## Logging - -```typescript -// WRONG: Using console.log -console.log('Discovered environment:', env); - -// RIGHT: Use extension logging -import { traceLog, traceVerbose } from './common/logging'; -traceLog('Discovered environment:', env.name); -``` - -## API Types - -```typescript -// WRONG: Assuming Uri -async runInDedicatedTerminal(terminalKey: Uri | string, ...): Promise { - const fsPath = terminalKey.fsPath; // Crashes if string! - -// RIGHT: Handle both types -async runInDedicatedTerminal(terminalKey: Uri | string, ...): Promise { - const keyPart = terminalKey instanceof Uri - ? path.normalize(terminalKey.fsPath) - : terminalKey; -``` +1. **Issue-first:** No code without an issue +2. **Review-always:** Reviewer agent before every commit +3. **Small PRs:** One issue, one branch, one PR +4. **Cross-platform:** Consider Windows, macOS, Linux +5. **Settings precedence:** workspace folder → workspace → user +6. **User decides:** Present options, let user choose +7. **Ship clean:** Every merge improves the repo --- -# Test Guidance - -## Test Types - -| Type | Command | When to Use | -| ----------- | -------------------------- | ----------------------------------- | -| Unit | `npm run unittest` | Quick feedback on isolated logic | -| Smoke | `npm run smoke-test` | Basic functionality in real VS Code | -| Integration | `npm run integration-test` | Component interaction tests | -| E2E | `npm run e2e-test` | Full user workflow tests | - -## Before Running Smoke/E2E/Integration Tests - -**CRITICAL:** These tests run against `dist/extension.js` built by webpack. - -```powershell -# Must run webpack build first! -npm run compile - -# Then run tests -npm run smoke-test -``` - -Without `npm run compile`, tests run against stale/missing code. - ---- - -# Manager-Specific Knowledge - -## Poetry - -- Check `POETRY_VIRTUALENVS_IN_PROJECT` env var -- Handle `{cache-dir}` placeholder in paths -- Platform-specific cache: Windows `%LOCALAPPDATA%\pypoetry\Cache`, macOS `~/Library/Caches/pypoetry` - -## PyEnv - -- Windows uses `pyenv-win` with different directory structure -- Use `path.resolve()` not `path.normalize()` for path comparisons - -## Conda - -- Fish shell uses different activation syntax -- Check Windows registry for installations - -## Pipenv - -- Check `WORKON_HOME` and `XDG_DATA_HOME` environment variables - -## Terminal Activation - -- Shell type detection for bash, zsh, fish, PowerShell, cmd -- `shellStartup` vs `command` activation patterns -- Terminal reveal timing for scripts with `input()` +# Quick Reference Commands + +| Task | Command | +| ----------------- | ------------------------------------------------------ | +| Unit tests | `npm run unittest` | +| Lint | `npm run lint` | +| Type check | `npm run compile-tests` | +| Build extension | `npm run compile` | +| Smoke tests | `npm run compile && npm run smoke-test` | +| Generate snapshot | `cd analysis && python snapshot.py -o ./snapshot.json` | +| Package VSIX | `npm run vsce-package` | diff --git a/.github/agents/reviewer.agent.md b/.github/agents/reviewer.agent.md index 9290682c..f4270c2f 100644 --- a/.github/agents/reviewer.agent.md +++ b/.github/agents/reviewer.agent.md @@ -37,6 +37,18 @@ Automated reviews consistently miss: - Localization gaps in user-facing messages - Accessibility regressions +## Related Skills + +For deep-dive patterns, these skills provide additional context: + +| Skill | Use When | +| ----------------------- | ------------------------------- | +| `/cross-platform-paths` | Reviewing path-related code | +| `/settings-precedence` | Reviewing settings code | +| `/manager-discovery` | Reviewing manager-specific code | + +The patterns below are the essential subset needed during reviews. + --- ## Review Process diff --git a/.github/hooks/maintainer-hooks.json b/.github/hooks/maintainer-hooks.json new file mode 100644 index 00000000..32442229 --- /dev/null +++ b/.github/hooks/maintainer-hooks.json @@ -0,0 +1,50 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "description": "Agent hooks for vscode-python-environments maintainer workflow", + "hooks": { + "SessionStart": [ + { + "type": "command", + "command": "python .github/hooks/scripts/session_start.py", + "windows": "python .github\\hooks\\scripts\\session_start.py", + "timeout": 10, + "env": { + "PYTHONPATH": "." + } + } + ], + "PostToolUse": [ + { + "type": "command", + "command": "python .github/hooks/scripts/post_tool_use.py", + "windows": "python .github\\hooks\\scripts\\post_tool_use.py", + "timeout": 60, + "env": { + "PYTHONPATH": "." + } + } + ], + "Stop": [ + { + "type": "command", + "command": "python .github/hooks/scripts/stop_hook.py", + "windows": "python .github\\hooks\\scripts\\stop_hook.py", + "timeout": 15, + "env": { + "PYTHONPATH": "." + } + } + ], + "SubagentStop": [ + { + "type": "command", + "command": "python .github/hooks/scripts/subagent_stop.py", + "windows": "python .github\\hooks\\scripts\\subagent_stop.py", + "timeout": 10, + "env": { + "PYTHONPATH": "." + } + } + ] + } +} diff --git a/.github/hooks/scripts/README.md b/.github/hooks/scripts/README.md new file mode 100644 index 00000000..9318944d --- /dev/null +++ b/.github/hooks/scripts/README.md @@ -0,0 +1,51 @@ +# Copilot Agent Hooks Scripts + +This directory contains Python scripts used by agent hooks to automate workflow validation. + +## Scripts + +### session_start.py + +Runs at session start to inject project context: + +- Current git branch and commit +- Uncommitted changes count +- Open issues (if gh CLI available) +- Snapshot health summary (if available) +- Available skills reminder + +### post_tool_use.py + +Runs after file edit tools to provide immediate feedback: + +- Runs ESLint on changed TypeScript files +- Reports lint errors back to the model + +### stop_hook.py + +Runs before session ends to enforce workflow: + +- Checks for uncommitted TypeScript changes +- Reminds about pre-commit checks +- Blocks completion if staged changes aren't committed + +### subagent_stop.py + +Runs when subagents complete: + +- Currently a passthrough for logging +- Can be extended to validate reviewer output + +## Requirements + +These scripts use Python 3.10+ with no external dependencies (beyond what's already in the repo). + +They expect: + +- `git` CLI available +- `gh` CLI available (optional, for issue context) +- `npx` available for running ESLint + +## Hook Configuration + +See `.github/hooks/maintainer-hooks.json` for the hook configuration that loads these scripts. diff --git a/.github/hooks/scripts/post_tool_use.py b/.github/hooks/scripts/post_tool_use.py new file mode 100644 index 00000000..e56bf58f --- /dev/null +++ b/.github/hooks/scripts/post_tool_use.py @@ -0,0 +1,131 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +"""PostToolUse hook - Runs validation after file edits. + +After editFiles tool: +- Runs ESLint on changed TypeScript files +- Reports lint errors back to the model as additional context +""" + +import json +import os +import subprocess +import sys +from pathlib import Path + +# Tools that modify files and should trigger validation +FILE_EDIT_TOOLS = {"editFiles", "createFile", "create_file", "replace_string_in_file"} + +# File patterns to validate +TYPESCRIPT_EXTENSIONS = {".ts", ".tsx"} + + +def run_eslint(files: list[str], cwd: Path) -> str | None: + """Run ESLint on specified files and return errors.""" + ts_files = [f for f in files if Path(f).suffix in TYPESCRIPT_EXTENSIONS] + if not ts_files: + return None + + try: + result = subprocess.run( + ["npx", "eslint", "--format", "compact", *ts_files], + cwd=cwd, + capture_output=True, + text=True, + timeout=30, + ) + if result.returncode != 0 and result.stdout: + # Parse compact format and summarize + lines = result.stdout.strip().split("\n") + error_count = sum( + 1 for line in lines if "Error" in line or "error" in line.lower() + ) + warning_count = sum( + 1 for line in lines if "Warning" in line or "warning" in line.lower() + ) + + if error_count > 0 or warning_count > 0: + summary = [] + if error_count > 0: + summary.append(f"{error_count} error(s)") + if warning_count > 0: + summary.append(f"{warning_count} warning(s)") + + # Include first few actual errors + sample_errors = [ + line + for line in lines[:5] + if "Error" in line or "error" in line.lower() + ] + + return f"ESLint: {', '.join(summary)}. " + " | ".join(sample_errors[:3]) + except (subprocess.TimeoutExpired, FileNotFoundError): + pass + + return None + + +def extract_files_from_tool_input(tool_name: str, tool_input: dict) -> list[str]: + """Extract file paths from tool input based on tool type.""" + files = [] + + if tool_name in {"editFiles", "edit_files"}: + # editFiles has a 'files' array + if isinstance(tool_input, dict): + files.extend(tool_input.get("files", [])) + elif tool_name in {"createFile", "create_file"}: + # createFile has 'filePath' + if isinstance(tool_input, dict) and "filePath" in tool_input: + files.append(tool_input["filePath"]) + elif tool_name == "replace_string_in_file": + # replace_string_in_file has 'filePath' + if isinstance(tool_input, dict) and "filePath" in tool_input: + files.append(tool_input["filePath"]) + + return files + + +def main() -> int: + """Main entry point.""" + # Read input from stdin + try: + input_data = json.load(sys.stdin) + except json.JSONDecodeError: + input_data = {} + + tool_name = input_data.get("tool_name", "") + tool_input = input_data.get("tool_input", {}) + repo_root = Path(input_data.get("cwd", os.getcwd())) + + # Only process file edit tools + if tool_name not in FILE_EDIT_TOOLS: + print(json.dumps({})) + return 0 + + # Extract files that were edited + files = extract_files_from_tool_input(tool_name, tool_input) + if not files: + print(json.dumps({})) + return 0 + + # Run ESLint on TypeScript files + lint_result = run_eslint(files, repo_root) + + # Build response + if lint_result: + response = { + "hookSpecificOutput": { + "hookEventName": "PostToolUse", + "additionalContext": lint_result, + } + } + else: + response = {} + + print(json.dumps(response)) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/.github/hooks/scripts/session_start.py b/.github/hooks/scripts/session_start.py new file mode 100644 index 00000000..67a42f00 --- /dev/null +++ b/.github/hooks/scripts/session_start.py @@ -0,0 +1,178 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +"""SessionStart hook - Injects project context at session start. + +Provides the agent with: +- Current git branch and status +- Recent commits summary +- Open issue count (if gh CLI available) +- Snapshot summary (if available) +""" + +import json +import os +import subprocess +import sys +from pathlib import Path + + +def run_command(cmd: list[str], cwd: Path | None = None) -> str | None: + """Run a command and return stdout, or None on failure.""" + try: + result = subprocess.run( + cmd, + cwd=cwd, + capture_output=True, + text=True, + timeout=5, + ) + if result.returncode == 0: + return result.stdout.strip() + except (subprocess.TimeoutExpired, FileNotFoundError): + pass + return None + + +def get_git_context(repo_root: Path) -> dict: + """Get current git context.""" + context = {} + + # Current branch + branch = run_command(["git", "rev-parse", "--abbrev-ref", "HEAD"], repo_root) + if branch: + context["branch"] = branch + + # Short SHA + sha = run_command(["git", "rev-parse", "--short", "HEAD"], repo_root) + if sha: + context["commit"] = sha + + # Uncommitted changes count + status = run_command(["git", "status", "--porcelain"], repo_root) + if status is not None: + changes = len([line for line in status.split("\n") if line.strip()]) + context["uncommitted_changes"] = changes + + # Recent commits (last 3) + log = run_command( + ["git", "log", "-3", "--oneline", "--no-decorate"], + repo_root, + ) + if log: + context["recent_commits"] = log.split("\n") + + return context + + +def get_issue_context(repo_root: Path) -> dict: + """Get open issue context if gh CLI is available.""" + context = {} + + # Check if gh CLI is available + issues_json = run_command( + [ + "gh", + "issue", + "list", + "--state", + "open", + "--limit", + "5", + "--json", + "number,title,labels", + ], + repo_root, + ) + if issues_json: + try: + issues = json.loads(issues_json) + context["open_issues_count"] = len(issues) + context["recent_issues"] = [ + f"#{i['number']}: {i['title']}" for i in issues[:3] + ] + except json.JSONDecodeError: + pass + + return context + + +def get_snapshot_summary(repo_root: Path) -> dict: + """Get snapshot summary if available.""" + snapshot_path = repo_root / "analysis" / "analysis-snapshot.json" + if not snapshot_path.exists(): + return {} + + try: + with open(snapshot_path, "r", encoding="utf-8") as f: + snapshot = json.load(f) + + summary = snapshot.get("summary", {}) + return { + "high_complexity_files": summary.get("high_complexity_files", 0), + "todo_count": summary.get("todo_count", 0), + "fixme_count": summary.get("fixme_count", 0), + "circular_dependencies": summary.get("circular_dependency_count", 0), + } + except (json.JSONDecodeError, OSError): + return {} + + +def main() -> int: + """Main entry point.""" + # Read input from stdin + try: + input_data = json.load(sys.stdin) + except json.JSONDecodeError: + input_data = {} + + repo_root = Path(input_data.get("cwd", os.getcwd())) + + # Gather context + git_context = get_git_context(repo_root) + issue_context = get_issue_context(repo_root) + snapshot_context = get_snapshot_summary(repo_root) + + # Build context message + parts = [] + + if git_context: + branch = git_context.get("branch", "unknown") + commit = git_context.get("commit", "") + changes = git_context.get("uncommitted_changes", 0) + parts.append(f"Git: {branch} @ {commit}") + if changes > 0: + parts.append(f"Uncommitted changes: {changes} files") + + if issue_context.get("recent_issues"): + parts.append(f"Open issues: {issue_context.get('open_issues_count', 0)}") + + if snapshot_context: + if snapshot_context.get("fixme_count", 0) > 0: + parts.append(f"FIXMEs: {snapshot_context['fixme_count']}") + if snapshot_context.get("circular_dependencies", 0) > 0: + parts.append(f"Circular deps: {snapshot_context['circular_dependencies']}") + + # Add reminder about skills + parts.append( + "Available skills: /generate-snapshot, /run-pre-commit-checks, " + "/cross-platform-paths, /settings-precedence, /manager-discovery" + ) + + # Output response + if parts: + response = { + "hookSpecificOutput": { + "hookEventName": "SessionStart", + "additionalContext": " | ".join(parts), + } + } + else: + response = {} + + print(json.dumps(response)) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/.github/hooks/scripts/stop_hook.py b/.github/hooks/scripts/stop_hook.py new file mode 100644 index 00000000..613fa009 --- /dev/null +++ b/.github/hooks/scripts/stop_hook.py @@ -0,0 +1,130 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +"""Stop hook - Ensures pre-commit checks were run before session ends. + +For maintainer sessions that modified files: +- Checks if there are uncommitted changes +- Verifies lint/type-check/tests passed or reminds to run them +""" + +import json +import os +import subprocess +import sys +from pathlib import Path + + +def run_command(cmd: list[str], cwd: Path | None = None) -> tuple[int, str]: + """Run a command and return (exit_code, output).""" + try: + result = subprocess.run( + cmd, + cwd=cwd, + capture_output=True, + text=True, + timeout=10, + ) + return result.returncode, result.stdout.strip() + except (subprocess.TimeoutExpired, FileNotFoundError) as e: + return 1, str(e) + + +def has_uncommitted_changes(repo_root: Path) -> bool: + """Check if there are uncommitted changes.""" + code, output = run_command(["git", "status", "--porcelain"], repo_root) + if code == 0 and output: + # Filter out untracked files in certain directories + lines = [ + line + for line in output.split("\n") + if line.strip() and not line.strip().startswith("??") # Ignore untracked + ] + return len(lines) > 0 + return False + + +def has_staged_changes(repo_root: Path) -> bool: + """Check if there are staged but uncommitted changes.""" + code, output = run_command(["git", "diff", "--cached", "--name-only"], repo_root) + return code == 0 and bool(output.strip()) + + +def check_ts_files_changed(repo_root: Path) -> bool: + """Check if any TypeScript files were changed.""" + code, output = run_command( + ["git", "diff", "--name-only", "HEAD"], + repo_root, + ) + if code == 0 and output: + return any(f.endswith((".ts", ".tsx")) for f in output.split("\n")) + + # Also check staged files + code, output = run_command( + ["git", "diff", "--cached", "--name-only"], + repo_root, + ) + if code == 0 and output: + return any(f.endswith((".ts", ".tsx")) for f in output.split("\n")) + + return False + + +def main() -> int: + """Main entry point.""" + # Read input from stdin + try: + input_data = json.load(sys.stdin) + except json.JSONDecodeError: + input_data = {} + + repo_root = Path(input_data.get("cwd", os.getcwd())) + + # Check if this is already a continuation from a previous stop hook + stop_hook_active = input_data.get("stop_hook_active", False) + if stop_hook_active: + # Don't block again to prevent infinite loop + print(json.dumps({})) + return 0 + + # Check for uncommitted TypeScript changes + if has_uncommitted_changes(repo_root) and check_ts_files_changed(repo_root): + # There are uncommitted TS changes - remind about pre-commit checks + response = { + "hookSpecificOutput": { + "hookEventName": "Stop", + "decision": "block", + "reason": ( + "You have uncommitted TypeScript changes. " + "Before finishing, run /run-pre-commit-checks skill " + "or manually run: npm run lint && npm run compile-tests && npm run unittest. " + "If checks pass and changes are ready, commit them. " + "If this session is just research/exploration, you can proceed without committing." + ), + } + } + print(json.dumps(response)) + return 0 + + # Check for staged but uncommitted changes + if has_staged_changes(repo_root): + response = { + "hookSpecificOutput": { + "hookEventName": "Stop", + "decision": "block", + "reason": ( + "You have staged changes that haven't been committed. " + "Either commit them with a proper message or unstage them before finishing." + ), + } + } + print(json.dumps(response)) + return 0 + + # All good + print(json.dumps({})) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/.github/hooks/scripts/subagent_stop.py b/.github/hooks/scripts/subagent_stop.py new file mode 100644 index 00000000..80cb6e23 --- /dev/null +++ b/.github/hooks/scripts/subagent_stop.py @@ -0,0 +1,44 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +"""SubagentStop hook - Validates reviewer subagent completed checks. + +When the reviewer agent finishes: +- Logs completion for audit trail +- Could be extended to verify review was thorough +""" + +import json +import sys + + +def main() -> int: + """Main entry point.""" + # Read input from stdin + try: + input_data = json.load(sys.stdin) + except json.JSONDecodeError: + input_data = {} + + agent_type = input_data.get("agent_type", "") + agent_id = input_data.get("agent_id", "") + stop_hook_active = input_data.get("stop_hook_active", False) + + # Prevent infinite loops + if stop_hook_active: + print(json.dumps({})) + return 0 + + # Log reviewer completions (could be extended to write to audit file) + if agent_type.lower() == "reviewer": + # Currently just a passthrough - could add validation + # For example, check that reviewer output contains expected sections + pass + + # Allow subagent to complete + print(json.dumps({})) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/.github/skills/cross-platform-paths/SKILL.md b/.github/skills/cross-platform-paths/SKILL.md new file mode 100644 index 00000000..fd2c669d --- /dev/null +++ b/.github/skills/cross-platform-paths/SKILL.md @@ -0,0 +1,203 @@ +--- +name: cross-platform-paths +description: Critical patterns for cross-platform path handling in this VS Code extension. Windows vs POSIX path bugs are the #1 source of issues. Use this skill when reviewing or writing path-related code. +argument-hint: Review path handling in [file or component] +--- + +# Cross-Platform Path Handling + +**CRITICAL**: This extension runs on Windows, macOS, and Linux. Path bugs are the #1 source of issues. + +## Core Rules + +### Rule 1: Never Concatenate Paths with `/` + +```typescript +// ❌ WRONG: POSIX-style path concatenation +const envPath = homeDir + '/.venv/bin/python'; + +// ✅ RIGHT: Use path.join() +const envPath = path.join(homeDir, '.venv', 'bin', 'python'); +``` + +### Rule 2: Use path.resolve() for Comparisons, Not path.normalize() + +```typescript +// ❌ WRONG: path.normalize keeps relative paths relative on Windows +const normalized = path.normalize(fsPath); +// path.normalize('\test') → '\test' (still relative!) + +// ✅ RIGHT: path.resolve adds drive letter on Windows +const normalized = path.resolve(fsPath); +// path.resolve('\test') → 'C:\test' (absolute!) + +// When comparing paths, use resolve() on BOTH sides: +const pathA = path.resolve(fsPath); +const pathB = path.resolve(e.environmentPath.fsPath); +return pathA === pathB; +``` + +### Rule 3: Use Uri.file().fsPath for VS Code Paths + +```typescript +// ❌ WRONG: Raw string comparison +if (filePath === otherPath) { +} + +// ✅ RIGHT: Compare fsPath to fsPath +import { Uri } from 'vscode'; +const fsPathA = Uri.file(pathA).fsPath; +const fsPathB = Uri.file(pathB).fsPath; +if (fsPathA === fsPathB) { +} +``` + +## Platform-Specific Gotchas + +### Windows + +| Issue | Details | +| ------------------ | -------------------------------------------- | +| Drive letters | Paths start with `C:\`, `D:\`, etc. | +| Backslashes | Separator is `\`, not `/` | +| Case insensitivity | `C:\Test` equals `c:\test` | +| Long paths | Paths >260 chars may fail | +| Mapped drives | `Z:\` may not be accessible | +| pyenv-win | Uses `pyenv.bat`, not `pyenv` or `pyenv.exe` | +| Poetry cache | `%LOCALAPPDATA%\pypoetry\Cache\virtualenvs` | +| UNC paths | `\\server\share\` format | + +### macOS + +| Issue | Details | +| ----------------- | ------------------------------------------- | +| Case sensitivity | Depends on filesystem (usually insensitive) | +| Homebrew symlinks | Complex symlink chains in `/opt/homebrew/` | +| Poetry cache | `~/Library/Caches/pypoetry/virtualenvs` | +| XCode Python | Different from Command Line Tools Python | + +### Linux + +| Issue | Details | +| ---------------- | --------------------------------------- | +| Case sensitivity | Paths ARE case-sensitive | +| /bin symlinks | `/bin` may be symlink to `/usr/bin` | +| XDG directories | `~/.local/share/virtualenvs` for pipenv | +| Poetry cache | `~/.cache/pypoetry/virtualenvs` | +| Hidden files | Dot-prefixed files are hidden | + +## Common Patterns + +### Getting Platform-Specific Paths + +```typescript +import * as os from 'os'; +import * as path from 'path'; + +// Home directory +const home = os.homedir(); // Works cross-platform + +// Construct paths correctly +const venvPath = path.join(home, '.venv', 'bin', 'python'); +// Windows: C:\Users\name\.venv\bin\python +// macOS: /Users/name/.venv/bin/python +// Linux: /home/name/.venv/bin/python +``` + +### Environment-Specific Executable Names + +```typescript +const isWindows = process.platform === 'win32'; + +// Python executable +const pythonExe = isWindows ? 'python.exe' : 'python'; + +// Activate script +const activateScript = isWindows + ? path.join(venvPath, 'Scripts', 'activate.bat') + : path.join(venvPath, 'bin', 'activate'); + +// pyenv command +const pyenvCmd = isWindows ? 'pyenv.bat' : 'pyenv'; +``` + +### Normalizing Paths for Comparison + +```typescript +import { normalizePath } from './common/utils/pathUtils'; + +// Use normalizePath() for map keys and comparisons +const key = normalizePath(filePath); +cache.set(key, value); + +// But preserve original for user display +traceLog(`Discovered: ${filePath}`); // Keep original +``` + +### Handling Uri | string Union Types + +```typescript +// ❌ WRONG: Assuming Uri +function process(locator: Uri | string) { + const fsPath = locator.fsPath; // Crashes if string! +} + +// ✅ RIGHT: Handle both types +function process(locator: Uri | string) { + const fsPath = locator instanceof Uri ? locator.fsPath : locator; + + // Now normalize for comparisons + const normalized = path.resolve(fsPath); +} +``` + +## File Existence Checks + +```typescript +import * as fs from 'fs'; +import * as path from 'path'; + +// Check file exists (cross-platform) +const configPath = path.join(projectRoot, 'pyproject.toml'); +if (fs.existsSync(configPath)) { + // File exists +} + +// Use async version when possible +import { promises as fsPromises } from 'fs'; +try { + await fsPromises.access(configPath); + // File exists +} catch { + // File does not exist +} +``` + +## Shell Path Escaping + +```typescript +// ❌ WRONG: Unescaped paths in shell commands +terminal.sendText(`python ${filePath}`); +// D:\path\file.py becomes "D:pathfile.py" in some shells! + +// ✅ RIGHT: Quote paths +terminal.sendText(`python "${filePath}"`); + +// For Git Bash on Windows, escape backslashes +const shellPath = isGitBash ? filePath.replace(/\\/g, '/') : filePath; +``` + +## Testing Cross-Platform Code + +When testing path-related code: + +1. Test on Windows (cmd, PowerShell, Git Bash) +2. Test on macOS (zsh, bash) +3. Test on Linux (bash, fish) + +Pay special attention to: + +- Paths with spaces: `C:\Program Files\Python` +- Paths with Unicode: `~/проекты/` +- Very long paths (>260 chars on Windows) +- Paths with special characters: `$`, `&`, `(`, `)` diff --git a/.github/skills/generate-snapshot/SKILL.md b/.github/skills/generate-snapshot/SKILL.md new file mode 100644 index 00000000..34d61f7f --- /dev/null +++ b/.github/skills/generate-snapshot/SKILL.md @@ -0,0 +1,141 @@ +--- +name: generate-snapshot +description: Generate a codebase health snapshot for technical debt tracking and planning. Analyzes git history, code complexity, debt markers, and dependencies to identify hotspots and refactoring priorities. +argument-hint: '--output path --pretty' +--- + +# Generate Codebase Snapshot + +This skill generates a comprehensive code health snapshot using the analysis modules in `analysis/`. + +## When to Use + +- During planning phase to identify work items +- To find refactoring hotspots (high churn + high complexity) +- To track technical debt over time +- Before major releases to assess code health +- To identify knowledge silos (bus factor risks) + +## How to Generate + +```powershell +cd analysis +python snapshot.py --output ./analysis-snapshot.json +``` + +Add `--pretty` flag to also print the JSON to stdout. + +## Snapshot Structure + +The snapshot contains these sections: + +### `summary` - High-level metrics dashboard + +- `files_with_changes`: Number of files with git changes +- `total_churn`: Total lines added + deleted +- `high_complexity_files`: Count of files with high complexity +- `todo_count`, `fixme_count`: Debt marker counts +- `circular_dependency_count`: Architectural issues +- `single_author_file_ratio`: Bus factor indicator + +### `priority_hotspots` - Top 20 refactoring candidates + +Files sorted by `priority_score = change_count × max_complexity` + +```json +{ + "path": "src/features/terminal/terminalManager.ts", + "change_count": 45, + "churn": 1200, + "max_complexity": 18, + "priority_score": 810 +} +``` + +High priority_score = frequently changed AND complex = prime refactoring target. + +### `git_analysis` - Change patterns + +- `hotspots`: Most frequently changed files +- `temporal_coupling`: Files that change together (hidden dependencies) +- `bus_factor`: Knowledge concentration risks + +### `complexity` - Code complexity metrics + +- `by_language.typescript`: TypeScript file metrics (max_complexity, avg_complexity, function_count) +- `high_complexity_functions`: Functions with cyclomatic complexity > 10 + +### `debt_indicators` - Technical debt markers + +- `debt_markers.by_type`: TODO, FIXME, HACK comments by type +- `large_files`: Files exceeding 500 lines of code +- `long_functions`: Functions exceeding 50 lines + +### `dependencies` - Module coupling analysis + +- `circular_dependencies`: Cycles in import graph +- `highly_coupled_modules`: Modules with fan-out > 10 +- `hub_modules`: Modules with fan-in > 10 +- `layer_violations`: Lower layers importing from higher layers + +## Interpreting Results + +### Priority Hotspots + +1. Sort by `priority_score` descending +2. Top items = files that change often AND are complex +3. These are prime candidates for: + - Breaking into smaller modules + - Adding tests before changes + - Simplifying complex functions + +### Temporal Coupling + +Files with `coupling_ratio > 0.8` changing together indicate: + +- Hidden dependencies not visible in imports +- Copy-paste code that should be shared +- Features spread across unrelated files + +### Knowledge Silos + +Files with `author_count = 1` and `change_count >= 3`: + +- Single point of failure for knowledge +- Consider documentation or pair programming +- Higher risk for bugs during that author's absence + +### Circular Dependencies + +Any cycles in the import graph: + +- Indicates tight coupling +- Makes testing difficult +- Consider introducing interfaces or restructuring + +## Example Usage in Planning + +``` +User: What should we work on next? + +Agent: Let me generate a snapshot and analyze it... +[generates snapshot] + +Based on the snapshot: + +Top 3 priority items: +1. **src/features/terminal/terminalManager.ts** (priority: 810) + - 45 changes, complexity 18 + - High churn indicates active development area + - Recommend: Split terminal concerns into separate modules + +2. **src/managers/common/nativePythonFinder.ts** (priority: 540) + - 30 changes, complexity 18 + - Multiple FIXME markers found + - Recommend: Address type guards and cache issues + +3. **src/features/interpreterSelection.ts** (priority: 360) + - 24 changes, complexity 15 + - Temporal coupling with settings files + - Recommend: Reduce coupling with settings module +``` diff --git a/.github/skills/manager-discovery/SKILL.md b/.github/skills/manager-discovery/SKILL.md new file mode 100644 index 00000000..a7f4256c --- /dev/null +++ b/.github/skills/manager-discovery/SKILL.md @@ -0,0 +1,329 @@ +--- +name: manager-discovery +description: Environment manager-specific discovery patterns and known issues. Use when working on or reviewing environment discovery code for conda, poetry, pipenv, pyenv, or venv. +argument-hint: 'manager name (e.g., poetry, conda, pyenv)' +user-invocable: true +--- + +# Environment Manager Discovery Patterns + +This skill documents manager-specific discovery patterns, environment variable precedence, and known issues. + +## Manager Quick Reference + +| Manager | Config Files | Cache Location | Key Env Vars | +| ------- | ---------------------------------------------- | ------------------------ | --------------------------------------------------- | +| Poetry | `poetry.toml`, `pyproject.toml`, `config.toml` | Platform-specific | `POETRY_VIRTUALENVS_IN_PROJECT`, `POETRY_CACHE_DIR` | +| Pipenv | `Pipfile`, `Pipfile.lock` | XDG or WORKON_HOME | `WORKON_HOME`, `XDG_DATA_HOME` | +| Pyenv | `.python-version`, `versions/` | `~/.pyenv/` or pyenv-win | `PYENV_ROOT`, `PYENV_VERSION` | +| Conda | `environment.yml`, `conda-meta/` | Registries + paths | `CONDA_PREFIX`, `CONDA_DEFAULT_ENV` | +| venv | `pyvenv.cfg` | In-project | None | + +--- + +## Poetry + +### Discovery Locations + +**Virtualenvs cache (default):** + +- Windows: `%LOCALAPPDATA%\pypoetry\Cache\virtualenvs` +- macOS: `~/Library/Caches/pypoetry/virtualenvs` +- Linux: `~/.cache/pypoetry/virtualenvs` + +**In-project (when enabled):** + +- `.venv/` in project root + +### Config Precedence (highest to lowest) + +1. Local config: `poetry.toml` in project root +2. Environment variables: `POETRY_VIRTUALENVS_*` +3. Global config: `~/.config/pypoetry/config.toml` + +### Known Issues + +| Issue | Description | Fix | +| ------------------------- | ----------------------------------------------- | -------------------------------- | +| `{cache-dir}` placeholder | Not resolved in paths from config | Resolve placeholder before use | +| Wrong default path | Windows/macOS differ from Linux | Use platform-specific defaults | +| In-project detection | `POETRY_VIRTUALENVS_IN_PROJECT` must be checked | Check env var first, then config | + +### Code Pattern + +```typescript +async function getPoetryVirtualenvsPath(): Promise { + // 1. Check environment variable first + const envVar = process.env.POETRY_VIRTUALENVS_PATH; + if (envVar) return envVar; + + // 2. Check local poetry.toml + const localConfig = await readPoetryToml(projectRoot); + if (localConfig?.virtualenvs?.path) { + return resolvePoetryPath(localConfig.virtualenvs.path); + } + + // 3. Use platform-specific default + return getDefaultPoetryCache(); +} + +function resolvePoetryPath(configPath: string): string { + // Handle {cache-dir} placeholder + if (configPath.includes('{cache-dir}')) { + const cacheDir = getDefaultPoetryCache(); + return configPath.replace('{cache-dir}', cacheDir); + } + return configPath; +} +``` + +--- + +## Pipenv + +### Discovery Locations + +**Default:** + +- Linux: `~/.local/share/virtualenvs/` (XDG_DATA_HOME) +- macOS: `~/.local/share/virtualenvs/` +- Windows: `~\.virtualenvs\` + +**When WORKON_HOME is set:** + +- Use `$WORKON_HOME/` directly + +### Environment Variables + +| Var | Purpose | +| ------------------------ | ---------------------------- | +| `WORKON_HOME` | Override virtualenv location | +| `XDG_DATA_HOME` | Base for Linux default | +| `PIPENV_VENV_IN_PROJECT` | Create `.venv/` in project | + +### Known Issues + +| Issue | Description | Fix | +| ----------------------------- | ------------------- | ---------------------------- | +| Missing WORKON_HOME support | Env var not checked | Read env var before defaults | +| Missing XDG_DATA_HOME support | Not used on Linux | Check XDG spec | + +### Code Pattern + +```typescript +function getPipenvVirtualenvsPath(): string { + // Check WORKON_HOME first + if (process.env.WORKON_HOME) { + return process.env.WORKON_HOME; + } + + // Check XDG_DATA_HOME on Linux + if (process.platform === 'linux') { + const xdgData = process.env.XDG_DATA_HOME || path.join(os.homedir(), '.local', 'share'); + return path.join(xdgData, 'virtualenvs'); + } + + // Windows/macOS defaults + return path.join(os.homedir(), '.virtualenvs'); +} +``` + +--- + +## PyEnv + +### Discovery Locations + +**Unix:** + +- `~/.pyenv/versions/` (default) +- `$PYENV_ROOT/versions/` (if PYENV_ROOT set) + +**Windows (pyenv-win):** + +- `%USERPROFILE%\.pyenv\pyenv-win\versions\` +- Different directory structure than Unix! + +### Key Differences: Unix vs Windows + +| Aspect | Unix | Windows (pyenv-win) | +| ------- | ----------------- | --------------------------------------- | +| Command | `pyenv` | `pyenv.bat` | +| Root | `~/.pyenv/` | `%USERPROFILE%\.pyenv\pyenv-win\` | +| Shims | `~/.pyenv/shims/` | `%USERPROFILE%\.pyenv\pyenv-win\shims\` | + +### Known Issues + +| Issue | Description | Fix | +| ---------------------------------- | ------------------------------------------ | ---------------------------------- | +| path.normalize() vs path.resolve() | Windows drive letter missing | Use `path.resolve()` on both sides | +| Wrong command on Windows | Looking for `pyenv` instead of `pyenv.bat` | Check for `.bat` extension | + +### Code Pattern + +```typescript +function getPyenvRoot(): string { + if (process.env.PYENV_ROOT) { + return process.env.PYENV_ROOT; + } + + if (process.platform === 'win32') { + // pyenv-win uses different structure + return path.join(os.homedir(), '.pyenv', 'pyenv-win'); + } + + return path.join(os.homedir(), '.pyenv'); +} + +function getPyenvVersionsPath(): string { + const root = getPyenvRoot(); + return path.join(root, 'versions'); +} + +// Use path.resolve() for comparisons! +function comparePyenvPaths(pathA: string, pathB: string): boolean { + return path.resolve(pathA) === path.resolve(pathB); +} +``` + +--- + +## Conda + +### Discovery Locations + +**Environment locations:** + +- Base install `envs/` directory +- `~/.conda/envs/` +- Paths in `~/.condarc` `envs_dirs` + +**Windows Registry:** + +- `HKCU\Software\Python\ContinuumAnalytics\` +- `HKLM\SOFTWARE\Python\ContinuumAnalytics\` + +### Shell Activation + +| Shell | Activation Command | +| ---------- | -------------------------------------- | +| bash, zsh | `source activate envname` | +| fish | `conda activate envname` (NOT source!) | +| PowerShell | `conda activate envname` | +| cmd | `activate.bat envname` | + +### Known Issues + +| Issue | Description | Fix | +| --------------------- | ----------------------- | -------------------------- | +| Fish shell activation | Uses bash-style command | Use fish-compatible syntax | +| Registry paths | May be stale/invalid | Verify paths exist | +| Base vs named envs | Different activation | Check if activating base | + +### Code Pattern + +```typescript +function getCondaActivationCommand(shell: ShellType, envName: string): string { + switch (shell) { + case 'fish': + // Fish uses different syntax! + return `conda activate ${envName}`; + case 'cmd': + return `activate.bat ${envName}`; + case 'powershell': + return `conda activate ${envName}`; + default: + // bash, zsh + return `source activate ${envName}`; + } +} +``` + +--- + +## venv + +### Discovery + +**Identification:** + +- Look for `pyvenv.cfg` file in directory +- Contains `home` and optionally `version` keys + +### Version Extraction Priority + +1. `version` field in `pyvenv.cfg` +2. Parse from `home` path (e.g., `Python311`) +3. Spawn Python executable (last resort) + +### Code Pattern + +```typescript +async function getVenvVersion(venvPath: string): Promise { + const cfgPath = path.join(venvPath, 'pyvenv.cfg'); + + try { + const content = await fs.readFile(cfgPath, 'utf-8'); + const lines = content.split('\n'); + + for (const line of lines) { + const [key, value] = line.split('=').map((s) => s.trim()); + if (key === 'version') { + return value; + } + } + + // Fall back to parsing home path + const homeLine = lines.find((l) => l.startsWith('home')); + if (homeLine) { + const home = homeLine.split('=')[1].trim(); + const match = home.match(/(\d+)\.(\d+)/); + if (match) { + return `${match[1]}.${match[2]}`; + } + } + } catch { + // Config file not found or unreadable + } + + return undefined; +} +``` + +--- + +## PET Server (Native Finder) + +### JSON-RPC Communication + +The PET server is a Rust-based locator that communicates via JSON-RPC over stdio. + +### Known Issues + +| Issue | Description | Fix | +| ------------------- | -------------------------------- | ----------------------------- | +| No timeout | JSON-RPC can hang forever | Add Promise.race with timeout | +| Silent spawn errors | Extension continues without envs | Surface spawn errors to user | +| Resource leaks | Worker pool not cleaned up | Dispose on deactivation | +| Type guard missing | Response types not validated | Add runtime type checks | +| Cache key collision | Paths normalize to same key | Use consistent normalization | + +### Code Pattern + +```typescript +async function fetchFromPET(method: string, params: unknown): Promise { + const timeout = 30000; // 30 seconds + + const result = await Promise.race([ + this.client.request(method, params), + new Promise((_, reject) => setTimeout(() => reject(new Error('PET server timeout')), timeout)), + ]); + + // Validate response type + if (!isValidResponse(result)) { + throw new Error(`Invalid response from PET: ${JSON.stringify(result)}`); + } + + return result; +} +``` diff --git a/.github/skills/run-pre-commit-checks/SKILL.md b/.github/skills/run-pre-commit-checks/SKILL.md new file mode 100644 index 00000000..91aaae9b --- /dev/null +++ b/.github/skills/run-pre-commit-checks/SKILL.md @@ -0,0 +1,132 @@ +--- +name: run-pre-commit-checks +description: Run the mandatory pre-commit checks before committing code. Includes lint, type checking, and unit tests. MUST be run before every commit. +argument-hint: '--fix to auto-fix lint issues' +--- + +# Run Pre-Commit Checks + +This skill defines the mandatory checks that must pass before any commit. + +## When to Use + +- **ALWAYS** before committing code changes +- After fixing reviewer or Copilot review comments +- Before pushing changes +- When the maintainer agent requests pre-commit validation + +## Required Checks + +All three checks must pass before committing: + +### 1. Lint Check (Required) + +```powershell +npm run lint +``` + +**What it checks:** + +- ESLint rules defined in `eslint.config.mjs` +- TypeScript-specific linting rules +- Import ordering and unused imports +- Code style consistency + +**To auto-fix issues:** + +```powershell +npm run lint -- --fix +``` + +### 2. Type Check (Required) + +```powershell +npm run compile-tests +``` + +**What it checks:** + +- TypeScript type errors +- Missing imports +- Type mismatches +- Strict null checks + +**Output location:** `out/` directory (not used for production) + +### 3. Unit Tests (Required) + +```powershell +npm run unittest +``` + +**What it checks:** + +- All unit tests in `src/test/` pass +- Tests run with Mocha framework +- Uses configuration from `build/.mocha.unittests.json` + +## Full Pre-Commit Workflow + +```powershell +# Run all checks in sequence +npm run lint +npm run compile-tests +npm run unittest + +# If all pass, commit +git add -A +git commit -m "feat: your change description (Fixes #N)" +``` + +## Common Failures and Fixes + +### ESLint Errors + +| Error | Fix | +| ------------------------------------ | ------------------------------------------------------ | +| `@typescript-eslint/no-unused-vars` | Remove unused variable or prefix with `_` | +| `import/order` | Run `npm run lint -- --fix` | +| `@typescript-eslint/no-explicit-any` | Add proper type annotation | +| `no-console` | Use `traceLog`/`traceVerbose` instead of `console.log` | + +### Type Errors + +| Error | Fix | +| -------------------------------------- | --------------------------------------- | +| `TS2339: Property does not exist` | Check property name or add type guard | +| `TS2345: Argument type not assignable` | Check function parameter types | +| `TS2322: Type not assignable` | Add type assertion or fix type mismatch | +| `TS18048: possibly undefined` | Add null check or use optional chaining | + +### Test Failures + +1. Read the test failure message carefully +2. Check if you changed behavior that tests depend on +3. Update tests if behavior change is intentional +4. Fix code if behavior change is unintentional + +## Integration with Review Process + +The maintainer agent workflow requires: + +``` +Code Change → Reviewer Agent → Pre-Commit Checks → Commit + ↓ + Fix Issues → Re-run Reviewer → Pre-Commit Checks +``` + +**Never skip pre-commit checks.** They catch: + +- Type errors that would break the extension +- Style inconsistencies +- Regressions in existing functionality + +## Automation Note + +These checks should also be run: + +- By CI on every PR (automated) +- After addressing review comments (manual trigger) +- Before merging (automated by CI) + +The hooks system can automate running lint after file edits (see `.github/hooks/`). diff --git a/.github/skills/settings-precedence/SKILL.md b/.github/skills/settings-precedence/SKILL.md new file mode 100644 index 00000000..32355f55 --- /dev/null +++ b/.github/skills/settings-precedence/SKILL.md @@ -0,0 +1,260 @@ +--- +name: settings-precedence +description: VS Code settings precedence rules and common pitfalls. Essential for any code that reads or writes settings. Covers getConfiguration scope, inspect() vs get(), and multi-workspace handling. +argument-hint: Review settings handling in [file or component] +--- + +# VS Code Settings Precedence + +Settings precedence bugs corrupt user configurations. This skill documents the correct patterns. + +## Precedence Order (Highest to Lowest) + +1. **Workspace folder value** - Per-folder in multi-root workspace +2. **Workspace value** - `.vscode/settings.json` or `.code-workspace` +3. **User/global value** - User `settings.json` +4. **Default value** - From extension's `package.json` (⚠️ may come from other extensions!) + +## Core Rules + +### Rule 1: Always Pass Scope to getConfiguration() + +```typescript +// ❌ WRONG: Missing scope +const config = vscode.workspace.getConfiguration('python-envs'); +const value = config.get('pythonProjects'); +// workspaceFolderValue will be UNDEFINED because VS Code doesn't know which folder! + +// ✅ RIGHT: Pass scope (workspace folder or document URI) +const config = vscode.workspace.getConfiguration('python-envs', workspaceFolder); +const value = config.get('pythonProjects'); +``` + +**When to pass scope:** + +- Reading per-resource settings (`scope: "resource"` in package.json) +- Any multi-workspace scenario +- When you need `workspaceFolderValue` from inspect() + +### Rule 2: Use inspect() to Check Explicit Values + +```typescript +// ❌ WRONG: get() returns defaultValue even from other extensions! +const config = vscode.workspace.getConfiguration('python'); +if (config.get('useEnvironmentsExtension')) { + // May return true from another extension's package.json default! +} + +// ✅ RIGHT: Use inspect() and check explicit values only +const config = vscode.workspace.getConfiguration('python', scope); +const inspected = config.inspect('useEnvironmentsExtension'); + +const hasExplicitValue = + inspected?.globalValue !== undefined || + inspected?.workspaceValue !== undefined || + inspected?.workspaceFolderValue !== undefined; + +if (hasExplicitValue) { + // User explicitly set this value + const effectiveValue = inspected?.workspaceFolderValue ?? inspected?.workspaceValue ?? inspected?.globalValue; +} +``` + +### Rule 3: Don't Overwrite User's Explicit Values + +```typescript +// ❌ WRONG: Unconditionally writing to settings +await config.update('pythonPath', detectedPath, ConfigurationTarget.Workspace); +// Overwrites user's explicit choice! + +// ✅ RIGHT: Check for existing explicit values first +const inspected = config.inspect('pythonPath'); +const hasUserValue = inspected?.workspaceValue !== undefined; + +if (!hasUserValue) { + // Only set if user hasn't explicitly chosen + await config.update('pythonPath', detectedPath, ConfigurationTarget.Workspace); +} +``` + +### Rule 4: Update at the Correct Scope + +```typescript +// Configuration targets (least to most specific) +ConfigurationTarget.Global; // User settings.json +ConfigurationTarget.Workspace; // .vscode/settings.json or .code-workspace +ConfigurationTarget.WorkspaceFolder; // Per-folder in multi-root + +// To remove a setting, update with undefined +await config.update('pythonPath', undefined, ConfigurationTarget.Workspace); +``` + +## Multi-Root Workspace Handling + +### The `workspace` Property + +For multi-root workspaces, `pythonProjects` settings need a `workspace` property: + +```json +{ + "python-envs.pythonProjects": [ + { + "path": ".", + "workspace": "/path/to/workspace-folder", + "envManager": "ms-python.python:venv" + } + ] +} +``` + +Without the `workspace` property, settings get mixed up between folders. + +### Getting the Right Workspace Folder + +```typescript +// ❌ WRONG: Always using first workspace folder +const workspaceFolder = vscode.workspace.workspaceFolders?.[0]; + +// ✅ RIGHT: Get folder for specific document/file +const workspaceFolder = vscode.workspace.getWorkspaceFolder(documentUri) ?? vscode.workspace.workspaceFolders?.[0]; + +// When you have a path but not a URI +const uri = vscode.Uri.file(filePath); +const workspaceFolder = vscode.workspace.getWorkspaceFolder(uri); +``` + +## Common Issues + +### Issue: workspaceFolderValue is undefined + +**Cause:** Missing scope parameter in `getConfiguration()` + +```typescript +// This returns undefined for workspaceFolderValue! +const config = vscode.workspace.getConfiguration('python-envs'); +const inspected = config.inspect('pythonProjects'); +console.log(inspected?.workspaceFolderValue); // undefined! + +// Fix: Pass scope +const config = vscode.workspace.getConfiguration('python-envs', workspaceFolder.uri); +const inspected = config.inspect('pythonProjects'); +console.log(inspected?.workspaceFolderValue); // Now works! +``` + +### Issue: defaultValue from other extensions + +**Cause:** Using `get()` instead of `inspect()` for boolean checks + +The `defaultValue` in `inspect()` may come from ANY extension's `package.json`, not just yours: + +```typescript +// Another extension might have in their package.json: +// "python.useEnvironmentsExtension": { "default": true } + +// Your check will be wrong: +config.get('python.useEnvironmentsExtension') // true from other extension! + +// Fix: Only check explicit values +const inspected = config.inspect('python.useEnvironmentsExtension'); +if (inspected?.globalValue === true || ...) { } +``` + +### Issue: Settings overwritten on reload + +**Cause:** Not checking for existing values before writing + +```typescript +// During extension activation, this overwrites user's config! +await config.update('defaultEnvManager', 'venv', ConfigurationTarget.Global); + +// Fix: Only write defaults if no value exists +const current = config.inspect('defaultEnvManager'); +if (current?.globalValue === undefined && current?.workspaceValue === undefined) { + await config.update('defaultEnvManager', 'venv', ConfigurationTarget.Global); +} +``` + +### Issue: Settings mixed up in multi-root + +**Cause:** Not including workspace identifier in settings + +```typescript +// Without workspace identifier, can't tell which folder this belongs to +{ + "python-envs.pythonProjects": [ + { "path": ".", "envManager": "venv" } // Which workspace? + ] +} + +// Fix: Always include workspace when saving +const project = { + path: projectPath, + workspace: workspaceFolder.uri.fsPath, + envManager: selectedManager +}; +``` + +## Complete Example: Safe Settings Read/Write + +```typescript +import * as vscode from 'vscode'; + +async function getProjectConfig(projectUri: vscode.Uri): Promise { + const workspaceFolder = vscode.workspace.getWorkspaceFolder(projectUri); + if (!workspaceFolder) { + return undefined; + } + + // Always pass scope! + const config = vscode.workspace.getConfiguration('python-envs', workspaceFolder.uri); + + // Use inspect() to understand where values come from + const inspected = config.inspect('pythonProjects'); + + // Prefer most specific value + const projects = inspected?.workspaceFolderValue ?? inspected?.workspaceValue ?? inspected?.globalValue ?? []; // Don't use defaultValue! + + // Find project matching the URI + return projects.find((p) => path.resolve(workspaceFolder.uri.fsPath, p.path) === projectUri.fsPath); +} + +async function saveProjectConfig(projectUri: vscode.Uri, projectConfig: ProjectConfig): Promise { + const workspaceFolder = vscode.workspace.getWorkspaceFolder(projectUri); + if (!workspaceFolder) { + return; + } + + const config = vscode.workspace.getConfiguration('python-envs', workspaceFolder.uri); + + const inspected = config.inspect('pythonProjects'); + + // Get existing projects (not including defaults!) + const existingProjects = inspected?.workspaceFolderValue ?? inspected?.workspaceValue ?? []; + + // Ensure workspace property for multi-root + const configToSave: ProjectConfig = { + ...projectConfig, + workspace: workspaceFolder.uri.fsPath, + }; + + // Update or add + const projectIndex = existingProjects.findIndex( + (p) => path.resolve(workspaceFolder.uri.fsPath, p.path) === projectUri.fsPath, + ); + + const updatedProjects = [...existingProjects]; + if (projectIndex >= 0) { + updatedProjects[projectIndex] = configToSave; + } else { + updatedProjects.push(configToSave); + } + + // Write to workspace folder scope in multi-root + const target = + vscode.workspace.workspaceFolders?.length > 1 + ? vscode.ConfigurationTarget.WorkspaceFolder + : vscode.ConfigurationTarget.Workspace; + + await config.update('pythonProjects', updatedProjects, target); +} +``` From 93b7995f0e2e03ab32501d9d7804a603f8177c03 Mon Sep 17 00:00:00 2001 From: Karthik Nadig Date: Fri, 13 Feb 2026 13:10:50 -0800 Subject: [PATCH 4/7] fix: address Copilot review feedback (PR #1232) - Fix Python 3.9 typing compatibility (use typing module) - Remove unused imports: defaultdict in dependency_analysis.py, Tuple in dependency_analysis.py - Remove unused variables: func_start, since_date, agent_id - Add explanatory comments for except pass blocks - Fix uv pip install command in code-analysis.yml - Update README.md to say Python 3.9+ instead of 3.10+ - Remove unused gitpython dependency from pyproject.toml - Rename manager-discovery skill to python-manager-discovery - Quote # symbol in cross-platform-paths description - Set user-invocable: false for reference skills - Remove / prefix from skill name references in agents/hooks --- .github/agents/maintainer.agent.md | 24 +++++++++---------- .github/agents/reviewer.agent.md | 10 ++++---- .github/hooks/scripts/README.md | 2 +- .github/hooks/scripts/post_tool_use.py | 7 ++++-- .github/hooks/scripts/session_start.py | 16 ++++++++----- .github/hooks/scripts/stop_hook.py | 5 ++-- .github/hooks/scripts/subagent_stop.py | 2 +- .github/skills/cross-platform-paths/SKILL.md | 3 ++- .../SKILL.md | 4 ++-- .github/skills/settings-precedence/SKILL.md | 1 + .github/workflows/code-analysis.yml | 2 +- analysis/complexity_analysis.py | 1 - analysis/dependency_analysis.py | 3 +-- analysis/git_analysis.py | 7 ------ analysis/pyproject.toml | 1 - 15 files changed, 44 insertions(+), 44 deletions(-) rename .github/skills/{manager-discovery => python-manager-discovery}/SKILL.md (99%) diff --git a/.github/agents/maintainer.agent.md b/.github/agents/maintainer.agent.md index 7ed54c54..7ce9bbae 100644 --- a/.github/agents/maintainer.agent.md +++ b/.github/agents/maintainer.agent.md @@ -67,13 +67,13 @@ tools: Load these skills on-demand for detailed knowledge: -| Skill | When to Use | -| ------------------------ | ------------------------------------------------- | -| `/generate-snapshot` | Generate codebase health snapshot for planning | -| `/run-pre-commit-checks` | Run mandatory checks before committing | -| `/cross-platform-paths` | Reviewing/writing path-related code | -| `/settings-precedence` | Reviewing/writing settings code | -| `/manager-discovery` | Working on specific manager (poetry, conda, etc.) | +| Skill | When to Use | +| -------------------------- | ------------------------------------------------- | +| `generate-snapshot` | Generate codebase health snapshot for planning | +| `run-pre-commit-checks` | Run mandatory checks before committing | +| `cross-platform-paths` | Reviewing/writing path-related code | +| `settings-precedence` | Reviewing/writing settings code | +| `python-manager-discovery` | Working on specific manager (poetry, conda, etc.) | --- @@ -103,7 +103,7 @@ Planning → Development → Review → Merge 1. **Gather context:** - Check open GitHub issues (`github/list_issues`, `github/search_issues`) - - Generate snapshot: use `/generate-snapshot` skill for details + - Generate snapshot: Use `generate-snapshot` skill for details - Check open PRs for related work 2. **Analyze and prioritize:** @@ -140,9 +140,9 @@ git checkout -b feature/issue-N # or bug/issue-N, chore/issue-N Follow guidelines from `.github/instructions/generic.instructions.md`: -- **Paths:** Use `/cross-platform-paths` skill for patterns -- **Settings:** Use `/settings-precedence` skill for patterns -- **Managers:** Use `/manager-discovery` skill for manager-specific knowledge +- **Paths:** Use `cross-platform-paths` skill for patterns +- **Settings:** Use `settings-precedence` skill for patterns +- **Managers:** Use `python-manager-discovery` skill for manager-specific knowledge - **Localization:** `l10n.t()` for user-facing messages - **Logging:** `traceLog`/`traceVerbose`, never `console.log` @@ -172,7 +172,7 @@ Run **Reviewer** agent (`.github/agents/reviewer.agent.md`) with: ## 5. Pre-Commit Checks (REQUIRED) -Use `/run-pre-commit-checks` skill for details. Quick reference: +Use `run-pre-commit-checks` skill for details. Quick reference: ```powershell npm run lint # ESLint diff --git a/.github/agents/reviewer.agent.md b/.github/agents/reviewer.agent.md index f4270c2f..adbe0440 100644 --- a/.github/agents/reviewer.agent.md +++ b/.github/agents/reviewer.agent.md @@ -41,11 +41,11 @@ Automated reviews consistently miss: For deep-dive patterns, these skills provide additional context: -| Skill | Use When | -| ----------------------- | ------------------------------- | -| `/cross-platform-paths` | Reviewing path-related code | -| `/settings-precedence` | Reviewing settings code | -| `/manager-discovery` | Reviewing manager-specific code | +| Skill | Use When | +| -------------------------- | ------------------------------- | +| `cross-platform-paths` | Reviewing path-related code | +| `settings-precedence` | Reviewing settings code | +| `python-manager-discovery` | Reviewing manager-specific code | The patterns below are the essential subset needed during reviews. diff --git a/.github/hooks/scripts/README.md b/.github/hooks/scripts/README.md index 9318944d..daefc7bc 100644 --- a/.github/hooks/scripts/README.md +++ b/.github/hooks/scripts/README.md @@ -38,7 +38,7 @@ Runs when subagents complete: ## Requirements -These scripts use Python 3.10+ with no external dependencies (beyond what's already in the repo). +These scripts use Python 3.9+ with no external dependencies (beyond what's already in the repo). They expect: diff --git a/.github/hooks/scripts/post_tool_use.py b/.github/hooks/scripts/post_tool_use.py index e56bf58f..57874969 100644 --- a/.github/hooks/scripts/post_tool_use.py +++ b/.github/hooks/scripts/post_tool_use.py @@ -13,6 +13,7 @@ import subprocess import sys from pathlib import Path +from typing import Dict, List, Optional # Tools that modify files and should trigger validation FILE_EDIT_TOOLS = {"editFiles", "createFile", "create_file", "replace_string_in_file"} @@ -21,7 +22,7 @@ TYPESCRIPT_EXTENSIONS = {".ts", ".tsx"} -def run_eslint(files: list[str], cwd: Path) -> str | None: +def run_eslint(files: List[str], cwd: Path) -> Optional[str]: """Run ESLint on specified files and return errors.""" ts_files = [f for f in files if Path(f).suffix in TYPESCRIPT_EXTENSIONS] if not ts_files: @@ -61,12 +62,14 @@ def run_eslint(files: list[str], cwd: Path) -> str | None: return f"ESLint: {', '.join(summary)}. " + " | ".join(sample_errors[:3]) except (subprocess.TimeoutExpired, FileNotFoundError): + # ESLint failures (timeout or missing binary) should not block the hook; + # treat them as "no lint results" and continue without reporting lint output. pass return None -def extract_files_from_tool_input(tool_name: str, tool_input: dict) -> list[str]: +def extract_files_from_tool_input(tool_name: str, tool_input: Dict) -> List[str]: """Extract file paths from tool input based on tool type.""" files = [] diff --git a/.github/hooks/scripts/session_start.py b/.github/hooks/scripts/session_start.py index 67a42f00..176aa5ca 100644 --- a/.github/hooks/scripts/session_start.py +++ b/.github/hooks/scripts/session_start.py @@ -15,9 +15,10 @@ import subprocess import sys from pathlib import Path +from typing import Dict, List, Optional -def run_command(cmd: list[str], cwd: Path | None = None) -> str | None: +def run_command(cmd: List[str], cwd: Optional[Path] = None) -> Optional[str]: """Run a command and return stdout, or None on failure.""" try: result = subprocess.run( @@ -30,11 +31,12 @@ def run_command(cmd: list[str], cwd: Path | None = None) -> str | None: if result.returncode == 0: return result.stdout.strip() except (subprocess.TimeoutExpired, FileNotFoundError): + # Git/gh CLI not available or timed out; return None to skip this context. pass return None -def get_git_context(repo_root: Path) -> dict: +def get_git_context(repo_root: Path) -> Dict: """Get current git context.""" context = {} @@ -65,7 +67,7 @@ def get_git_context(repo_root: Path) -> dict: return context -def get_issue_context(repo_root: Path) -> dict: +def get_issue_context(repo_root: Path) -> Dict: """Get open issue context if gh CLI is available.""" context = {} @@ -92,12 +94,13 @@ def get_issue_context(repo_root: Path) -> dict: f"#{i['number']}: {i['title']}" for i in issues[:3] ] except json.JSONDecodeError: + # Malformed JSON from gh CLI; skip issue context. pass return context -def get_snapshot_summary(repo_root: Path) -> dict: +def get_snapshot_summary(repo_root: Path) -> Dict: """Get snapshot summary if available.""" snapshot_path = repo_root / "analysis" / "analysis-snapshot.json" if not snapshot_path.exists(): @@ -115,6 +118,7 @@ def get_snapshot_summary(repo_root: Path) -> dict: "circular_dependencies": summary.get("circular_dependency_count", 0), } except (json.JSONDecodeError, OSError): + # Snapshot file unreadable or malformed; skip snapshot context. return {} @@ -155,8 +159,8 @@ def main() -> int: # Add reminder about skills parts.append( - "Available skills: /generate-snapshot, /run-pre-commit-checks, " - "/cross-platform-paths, /settings-precedence, /manager-discovery" + "Available skills: generate-snapshot, run-pre-commit-checks, " + "cross-platform-paths, settings-precedence, python-manager-discovery" ) # Output response diff --git a/.github/hooks/scripts/stop_hook.py b/.github/hooks/scripts/stop_hook.py index 613fa009..a67e44c4 100644 --- a/.github/hooks/scripts/stop_hook.py +++ b/.github/hooks/scripts/stop_hook.py @@ -13,9 +13,10 @@ import subprocess import sys from pathlib import Path +from typing import List, Optional, Tuple -def run_command(cmd: list[str], cwd: Path | None = None) -> tuple[int, str]: +def run_command(cmd: List[str], cwd: Optional[Path] = None) -> Tuple[int, str]: """Run a command and return (exit_code, output).""" try: result = subprocess.run( @@ -96,7 +97,7 @@ def main() -> int: "decision": "block", "reason": ( "You have uncommitted TypeScript changes. " - "Before finishing, run /run-pre-commit-checks skill " + "Before finishing, use the run-pre-commit-checks skill " "or manually run: npm run lint && npm run compile-tests && npm run unittest. " "If checks pass and changes are ready, commit them. " "If this session is just research/exploration, you can proceed without committing." diff --git a/.github/hooks/scripts/subagent_stop.py b/.github/hooks/scripts/subagent_stop.py index 80cb6e23..3d8512af 100644 --- a/.github/hooks/scripts/subagent_stop.py +++ b/.github/hooks/scripts/subagent_stop.py @@ -21,7 +21,7 @@ def main() -> int: input_data = {} agent_type = input_data.get("agent_type", "") - agent_id = input_data.get("agent_id", "") + # agent_id available in input_data if needed for logging stop_hook_active = input_data.get("stop_hook_active", False) # Prevent infinite loops diff --git a/.github/skills/cross-platform-paths/SKILL.md b/.github/skills/cross-platform-paths/SKILL.md index fd2c669d..1e4197a3 100644 --- a/.github/skills/cross-platform-paths/SKILL.md +++ b/.github/skills/cross-platform-paths/SKILL.md @@ -1,7 +1,8 @@ --- name: cross-platform-paths -description: Critical patterns for cross-platform path handling in this VS Code extension. Windows vs POSIX path bugs are the #1 source of issues. Use this skill when reviewing or writing path-related code. +description: 'Critical patterns for cross-platform path handling in this VS Code extension. Windows vs POSIX path bugs are the #1 source of issues. Use this skill when reviewing or writing path-related code.' argument-hint: Review path handling in [file or component] +user-invocable: false --- # Cross-Platform Path Handling diff --git a/.github/skills/manager-discovery/SKILL.md b/.github/skills/python-manager-discovery/SKILL.md similarity index 99% rename from .github/skills/manager-discovery/SKILL.md rename to .github/skills/python-manager-discovery/SKILL.md index a7f4256c..4d2b3555 100644 --- a/.github/skills/manager-discovery/SKILL.md +++ b/.github/skills/python-manager-discovery/SKILL.md @@ -1,8 +1,8 @@ --- -name: manager-discovery +name: python-manager-discovery description: Environment manager-specific discovery patterns and known issues. Use when working on or reviewing environment discovery code for conda, poetry, pipenv, pyenv, or venv. argument-hint: 'manager name (e.g., poetry, conda, pyenv)' -user-invocable: true +user-invocable: false --- # Environment Manager Discovery Patterns diff --git a/.github/skills/settings-precedence/SKILL.md b/.github/skills/settings-precedence/SKILL.md index 32355f55..22fcc31c 100644 --- a/.github/skills/settings-precedence/SKILL.md +++ b/.github/skills/settings-precedence/SKILL.md @@ -2,6 +2,7 @@ name: settings-precedence description: VS Code settings precedence rules and common pitfalls. Essential for any code that reads or writes settings. Covers getConfiguration scope, inspect() vs get(), and multi-workspace handling. argument-hint: Review settings handling in [file or component] +user-invocable: false --- # VS Code Settings Precedence diff --git a/.github/workflows/code-analysis.yml b/.github/workflows/code-analysis.yml index c74fea52..2692ea7f 100644 --- a/.github/workflows/code-analysis.yml +++ b/.github/workflows/code-analysis.yml @@ -30,7 +30,7 @@ jobs: - name: Install analysis dependencies working-directory: analysis run: | - uv pip install --system -r pyproject.toml + uv pip install --system . - name: Generate snapshot working-directory: analysis diff --git a/analysis/complexity_analysis.py b/analysis/complexity_analysis.py index 523c39d7..f204cdda 100644 --- a/analysis/complexity_analysis.py +++ b/analysis/complexity_analysis.py @@ -206,7 +206,6 @@ def analyze_typescript_file( start_line = content[: match.start()].count("\n") + 1 # Find function end (rough estimate - count braces) - func_start = match.end() func_end = len(content) if i + 1 < len(func_matches): diff --git a/analysis/dependency_analysis.py b/analysis/dependency_analysis.py index 90bb5b4c..a939cd71 100644 --- a/analysis/dependency_analysis.py +++ b/analysis/dependency_analysis.py @@ -12,9 +12,8 @@ import pathlib import re -from collections import defaultdict from dataclasses import dataclass, field -from typing import Dict, List, Optional, Set, Tuple +from typing import Dict, List, Optional, Set import pathspec diff --git a/analysis/git_analysis.py b/analysis/git_analysis.py index 8d5304e6..d0ffa1d2 100644 --- a/analysis/git_analysis.py +++ b/analysis/git_analysis.py @@ -115,13 +115,6 @@ def analyze_git_log(repo_root: pathlib.Path) -> Dict[str, FileStats]: file_stats: Dict[str, FileStats] = defaultdict(lambda: FileStats(path="")) # Get commits from last N days, limited to MAX_COMMITS - since_date = datetime.now(timezone.utc).replace( - hour=0, minute=0, second=0, microsecond=0 - ) - since_date = since_date.replace( - year=since_date.year - 1 if DAYS_OF_HISTORY >= 365 else since_date.year - ) - try: log_output = run_git_command( [ diff --git a/analysis/pyproject.toml b/analysis/pyproject.toml index 74ba285d..d2c284d7 100644 --- a/analysis/pyproject.toml +++ b/analysis/pyproject.toml @@ -4,7 +4,6 @@ version = "0.1.0" description = "Code health and technical debt analysis tools" requires-python = ">=3.9" dependencies = [ - "gitpython>=3.1.0", "radon>=6.0.0", "pathspec>=0.11.0", ] From 2d5ce13f770365d09e4625064bf11d651b33a61e Mon Sep 17 00:00:00 2001 From: Karthik Nadig Date: Fri, 13 Feb 2026 13:35:01 -0800 Subject: [PATCH 5/7] fix: address additional Copilot review feedback - Add timeout parameters to subprocess calls in snapshot.py and git_analysis.py - Fix debt marker regex pattern (remove literal pipe from character class) - Fix import pattern regex to avoid cross-statement matches - Use astral-sh/setup-uv@v4 action instead of curl | sh --- .github/workflows/code-analysis.yml | 4 +--- analysis/debt_indicators.py | 6 +++--- analysis/dependency_analysis.py | 2 +- analysis/git_analysis.py | 1 + analysis/snapshot.py | 6 +++++- 5 files changed, 11 insertions(+), 8 deletions(-) diff --git a/.github/workflows/code-analysis.yml b/.github/workflows/code-analysis.yml index 2692ea7f..893fcd18 100644 --- a/.github/workflows/code-analysis.yml +++ b/.github/workflows/code-analysis.yml @@ -23,9 +23,7 @@ jobs: python-version: '3.12' - name: Install uv - run: | - curl -LsSf https://astral.sh/uv/install.sh | sh - echo "$HOME/.local/bin" >> $GITHUB_PATH + uses: astral-sh/setup-uv@v4 - name: Install analysis dependencies working-directory: analysis diff --git a/analysis/debt_indicators.py b/analysis/debt_indicators.py index dc9b3e98..1b169cca 100644 --- a/analysis/debt_indicators.py +++ b/analysis/debt_indicators.py @@ -69,13 +69,13 @@ def to_dict(self) -> dict: # Debt marker patterns DEBT_PATTERNS = [ - (r"#\s*(TODO|FIXME|HACK|XXX|BUG|REFACTOR|OPTIMIZE|REVIEW)[:|\s](.+)$", "python"), + (r"#\s*(TODO|FIXME|HACK|XXX|BUG|REFACTOR|OPTIMIZE|REVIEW)[:\s](.+)$", "python"), ( - r"//\s*(TODO|FIXME|HACK|XXX|BUG|REFACTOR|OPTIMIZE|REVIEW)[:|\s](.+)$", + r"//\s*(TODO|FIXME|HACK|XXX|BUG|REFACTOR|OPTIMIZE|REVIEW)[:\s](.+)$", "typescript", ), ( - r"/\*\s*(TODO|FIXME|HACK|XXX|BUG|REFACTOR|OPTIMIZE|REVIEW)[:|\s](.+?)\*/", + r"/\*\s*(TODO|FIXME|HACK|XXX|BUG|REFACTOR|OPTIMIZE|REVIEW)[:\s](.+?)\*/", "typescript", ), ] diff --git a/analysis/dependency_analysis.py b/analysis/dependency_analysis.py index a939cd71..6568edbf 100644 --- a/analysis/dependency_analysis.py +++ b/analysis/dependency_analysis.py @@ -122,7 +122,7 @@ def extract_imports_typescript( # import './path' # const x = require('./path') import_patterns = [ - r'import\s+(?:.*?\s+from\s+)?[\'"]([^\'"]+)[\'"]', + r'import\s+(?:[^\'";]+?\s+from\s+)?[\'"]([^\'"]+)[\'"]', r'require\s*\(\s*[\'"]([^\'"]+)[\'"]\s*\)', r'import\s*\(\s*[\'"]([^\'"]+)[\'"]\s*\)', # dynamic import ] diff --git a/analysis/git_analysis.py b/analysis/git_analysis.py index d0ffa1d2..c9c623a2 100644 --- a/analysis/git_analysis.py +++ b/analysis/git_analysis.py @@ -92,6 +92,7 @@ def run_git_command(args: List[str], cwd: pathlib.Path) -> str: capture_output=True, text=True, check=True, + timeout=60, ) return result.stdout diff --git a/analysis/snapshot.py b/analysis/snapshot.py index 3f35c911..44695c15 100644 --- a/analysis/snapshot.py +++ b/analysis/snapshot.py @@ -34,6 +34,7 @@ def get_git_info(repo_root: pathlib.Path) -> dict: capture_output=True, text=True, check=True, + timeout=10, ).stdout.strip() short_sha = subprocess.run( @@ -42,6 +43,7 @@ def get_git_info(repo_root: pathlib.Path) -> dict: capture_output=True, text=True, check=True, + timeout=10, ).stdout.strip() branch = subprocess.run( @@ -50,6 +52,7 @@ def get_git_info(repo_root: pathlib.Path) -> dict: capture_output=True, text=True, check=True, + timeout=10, ).stdout.strip() # Get commit message @@ -59,6 +62,7 @@ def get_git_info(repo_root: pathlib.Path) -> dict: capture_output=True, text=True, check=True, + timeout=10, ).stdout.strip() return { @@ -67,7 +71,7 @@ def get_git_info(repo_root: pathlib.Path) -> dict: "branch": branch, "message": message[:200], # Truncate long messages } - except subprocess.CalledProcessError: + except (subprocess.CalledProcessError, subprocess.TimeoutExpired): return { "sha": "unknown", "short_sha": "unknown", From 2e5a275d34c33e24f5a003d1f7d125b035e750aa Mon Sep 17 00:00:00 2001 From: Karthik Nadig Date: Fri, 13 Feb 2026 16:45:36 -0800 Subject: [PATCH 6/7] fix: address more Copilot review feedback - Fix should_analyze_file() to use relative path parts instead of absolute - Add [build-system] table to pyproject.toml for uv pip install - Fix snapshot path in skill docs and session_start.py (use repo root) - Rename open_issues_count to recent_issues_count (reflects --limit 5) --- .github/hooks/scripts/session_start.py | 6 +++--- .github/skills/generate-snapshot/SKILL.md | 4 +++- analysis/complexity_analysis.py | 4 +++- analysis/debt_indicators.py | 4 +++- analysis/dependency_analysis.py | 4 +++- analysis/pyproject.toml | 4 ++++ 6 files changed, 19 insertions(+), 7 deletions(-) diff --git a/.github/hooks/scripts/session_start.py b/.github/hooks/scripts/session_start.py index 176aa5ca..ff96efea 100644 --- a/.github/hooks/scripts/session_start.py +++ b/.github/hooks/scripts/session_start.py @@ -89,7 +89,7 @@ def get_issue_context(repo_root: Path) -> Dict: if issues_json: try: issues = json.loads(issues_json) - context["open_issues_count"] = len(issues) + context["recent_issues_count"] = len(issues) context["recent_issues"] = [ f"#{i['number']}: {i['title']}" for i in issues[:3] ] @@ -102,7 +102,7 @@ def get_issue_context(repo_root: Path) -> Dict: def get_snapshot_summary(repo_root: Path) -> Dict: """Get snapshot summary if available.""" - snapshot_path = repo_root / "analysis" / "analysis-snapshot.json" + snapshot_path = repo_root / "analysis-snapshot.json" if not snapshot_path.exists(): return {} @@ -149,7 +149,7 @@ def main() -> int: parts.append(f"Uncommitted changes: {changes} files") if issue_context.get("recent_issues"): - parts.append(f"Open issues: {issue_context.get('open_issues_count', 0)}") + parts.append(f"Recent issues: {issue_context.get('recent_issues_count', 0)}") if snapshot_context: if snapshot_context.get("fixme_count", 0) > 0: diff --git a/.github/skills/generate-snapshot/SKILL.md b/.github/skills/generate-snapshot/SKILL.md index 34d61f7f..91e802ec 100644 --- a/.github/skills/generate-snapshot/SKILL.md +++ b/.github/skills/generate-snapshot/SKILL.md @@ -20,11 +20,13 @@ This skill generates a comprehensive code health snapshot using the analysis mod ```powershell cd analysis -python snapshot.py --output ./analysis-snapshot.json +python snapshot.py --output ../analysis-snapshot.json ``` Add `--pretty` flag to also print the JSON to stdout. +**Note:** The snapshot is written to the repository root (`analysis-snapshot.json`), not inside the `analysis/` folder. This path is ignored by `.gitignore`. + ## Snapshot Structure The snapshot contains these sections: diff --git a/analysis/complexity_analysis.py b/analysis/complexity_analysis.py index f204cdda..87cde6a8 100644 --- a/analysis/complexity_analysis.py +++ b/analysis/complexity_analysis.py @@ -95,7 +95,9 @@ def should_analyze_file( ".git", ".vscode-test", } - for part in filepath.parts: + # Use relative path parts to avoid matching directories in repo root path + rel_parts = filepath.relative_to(repo_root).parts + for part in rel_parts: if part in skip_dirs: return False diff --git a/analysis/debt_indicators.py b/analysis/debt_indicators.py index 1b169cca..cff3b758 100644 --- a/analysis/debt_indicators.py +++ b/analysis/debt_indicators.py @@ -115,7 +115,9 @@ def should_analyze_file( ".git", ".vscode-test", } - for part in filepath.parts: + # Use relative path parts to avoid matching directories in repo root path + rel_parts = filepath.relative_to(repo_root).parts + for part in rel_parts: if part in skip_dirs: return False diff --git a/analysis/dependency_analysis.py b/analysis/dependency_analysis.py index 6568edbf..7af33670 100644 --- a/analysis/dependency_analysis.py +++ b/analysis/dependency_analysis.py @@ -89,7 +89,9 @@ def should_analyze_file( ".git", ".vscode-test", } - for part in filepath.parts: + # Use relative path parts to avoid matching directories in repo root path + rel_parts = filepath.relative_to(repo_root).parts + for part in rel_parts: if part in skip_dirs: return False diff --git a/analysis/pyproject.toml b/analysis/pyproject.toml index d2c284d7..f6249ef3 100644 --- a/analysis/pyproject.toml +++ b/analysis/pyproject.toml @@ -1,3 +1,7 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + [project] name = "vscode-python-environments-analysis" version = "0.1.0" From fc4b439624d5da2adbb762ebeda396ea6cbfc914 Mon Sep 17 00:00:00 2001 From: Karthik Nadig Date: Fri, 13 Feb 2026 17:01:19 -0800 Subject: [PATCH 7/7] fix: address more Copilot review feedback - Use relative imports in snapshot.py for proper package structure - Add traceback.print_exc() for better CI debugging on failures - Add test file patterns to _should_skip_file in git_analysis.py - Include .tsx, .js, .jsx in complexity analysis (not just .ts) - Update workflow to use 'python -m analysis.snapshot' invocation - Update skill docs with new execution method --- .github/skills/generate-snapshot/SKILL.md | 6 +++--- .github/workflows/code-analysis.yml | 3 +-- analysis/complexity_analysis.py | 4 ++-- analysis/git_analysis.py | 9 +++++++++ analysis/snapshot.py | 12 +++++++----- 5 files changed, 22 insertions(+), 12 deletions(-) diff --git a/.github/skills/generate-snapshot/SKILL.md b/.github/skills/generate-snapshot/SKILL.md index 91e802ec..65d81a7b 100644 --- a/.github/skills/generate-snapshot/SKILL.md +++ b/.github/skills/generate-snapshot/SKILL.md @@ -19,13 +19,13 @@ This skill generates a comprehensive code health snapshot using the analysis mod ## How to Generate ```powershell -cd analysis -python snapshot.py --output ../analysis-snapshot.json +# From repository root +python -m analysis.snapshot --output analysis-snapshot.json ``` Add `--pretty` flag to also print the JSON to stdout. -**Note:** The snapshot is written to the repository root (`analysis-snapshot.json`), not inside the `analysis/` folder. This path is ignored by `.gitignore`. +**Note:** The snapshot is written to the repository root (`analysis-snapshot.json`). This path is ignored by `.gitignore`. ## Snapshot Structure diff --git a/.github/workflows/code-analysis.yml b/.github/workflows/code-analysis.yml index 893fcd18..53b5ced6 100644 --- a/.github/workflows/code-analysis.yml +++ b/.github/workflows/code-analysis.yml @@ -31,9 +31,8 @@ jobs: uv pip install --system . - name: Generate snapshot - working-directory: analysis run: | - python snapshot.py --output ../analysis-snapshot.json + python -m analysis.snapshot --output analysis-snapshot.json - name: Upload snapshot artifact uses: actions/upload-artifact@v4 diff --git a/analysis/complexity_analysis.py b/analysis/complexity_analysis.py index 87cde6a8..5714a8f1 100644 --- a/analysis/complexity_analysis.py +++ b/analysis/complexity_analysis.py @@ -279,8 +279,8 @@ def analyze_complexity(repo_root: pathlib.Path) -> dict: if file_complexity: results["python"].append(file_complexity.to_dict()) - # Analyze TypeScript files - ts_files = find_source_files(repo_root, [".ts"]) + # Analyze TypeScript/JavaScript files + ts_files = find_source_files(repo_root, [".ts", ".tsx", ".js", ".jsx"]) for filepath in ts_files: file_complexity = analyze_typescript_file(filepath, repo_root) if file_complexity: diff --git a/analysis/git_analysis.py b/analysis/git_analysis.py index c9c623a2..5ed9efd2 100644 --- a/analysis/git_analysis.py +++ b/analysis/git_analysis.py @@ -207,6 +207,15 @@ def _should_skip_file(filepath: str) -> bool: ".git/", "package-lock.json", ".vsix", + # Skip test files and directories + "/test/", + "/tests/", + "/__tests__/", + ".test.", + ".spec.", + "_test.", + "_spec.", + "/mocks/", ] return any(pattern in filepath for pattern in skip_patterns) diff --git a/analysis/snapshot.py b/analysis/snapshot.py index 44695c15..be6f09a5 100644 --- a/analysis/snapshot.py +++ b/analysis/snapshot.py @@ -12,14 +12,15 @@ import pathlib import subprocess import sys +import traceback from datetime import datetime, timezone from typing import Optional -# Import analysis modules -from complexity_analysis import analyze_complexity -from debt_indicators import analyze_debt -from dependency_analysis import analyze_dependencies -from git_analysis import analyze_repository as analyze_git +# Import analysis modules (relative imports for package structure) +from .complexity_analysis import analyze_complexity +from .debt_indicators import analyze_debt +from .dependency_analysis import analyze_dependencies +from .git_analysis import analyze_repository as analyze_git # Snapshot schema version - increment when breaking changes are made SCHEMA_VERSION = "1.0.0" @@ -285,6 +286,7 @@ def main() -> int: return 0 except Exception as e: print(f"Error generating snapshot: {e}", file=sys.stderr) + traceback.print_exc() return 1