From 101670612ad04d49036517a5d3e848506862285d Mon Sep 17 00:00:00 2001 From: Suntion <149924916+SunYanbox@users.noreply.github.com> Date: Wed, 6 May 2026 18:34:38 +0800 Subject: [PATCH 1/5] =?UTF-8?q?feat(workspace):=20=E9=87=8D=E6=9E=84?= =?UTF-8?q?=E6=8E=92=E9=99=A4=E4=B8=8E=E6=9D=83=E9=99=90=E7=AE=A1=E7=90=86?= =?UTF-8?q?=E6=9E=B6=E6=9E=84=EF=BC=8C=E7=BB=9F=E4=B8=80=E5=AE=89=E5=85=A8?= =?UTF-8?q?=E7=AD=96=E7=95=A5=20-=20=E6=96=B0=E5=A2=9E=E5=8A=9F=E8=83=BD:?= =?UTF-8?q?=20=E5=BC=95=E5=85=A5=E7=BB=9F=E4=B8=80=E7=9A=84=E6=96=87?= =?UTF-8?q?=E4=BB=B6=E6=8E=92=E9=99=A4=E4=B8=8E=E6=9D=83=E9=99=90=E5=86=B3?= =?UTF-8?q?=E7=AD=96=E5=BC=95=E6=93=8E=20=20=20*=20=E6=96=B0=E5=A2=9E=20`E?= =?UTF-8?q?xclusionManager`=20=E7=B1=BB=EF=BC=8C=E8=81=9A=E5=90=88?= =?UTF-8?q?=E9=BB=98=E8=AE=A4=E6=80=A7=E8=83=BD=E6=8E=92=E9=99=A4=E3=80=81?= =?UTF-8?q?.gitignore=20=E8=A7=84=E5=88=99=E5=8F=8A=E7=94=A8=E6=88=B7?= =?UTF-8?q?=E8=87=AA=E5=AE=9A=E4=B9=89=E5=BF=BD=E7=95=A5=E9=A1=B9=20=20=20?= =?UTF-8?q?*=20=E6=96=B0=E5=A2=9E=20`PermissionManager`=20=E7=B1=BB?= =?UTF-8?q?=EF=BC=8C=E6=8F=90=E4=BE=9B=E5=9F=BA=E4=BA=8E=E6=93=8D=E4=BD=9C?= =?UTF-8?q?=E7=B1=BB=E5=9E=8B=20(READ/WRITE/SEARCH)=20=E7=9A=84=E7=BB=9F?= =?UTF-8?q?=E4=B8=80=E6=9D=83=E9=99=90=E5=86=B3=E7=AD=96=E6=8E=A5=E5=8F=A3?= =?UTF-8?q?=20=20=20*=20=E6=96=B0=E5=A2=9E=20`SensitiveFileError`=20?= =?UTF-8?q?=E5=BC=82=E5=B8=B8=E7=B1=BB=EF=BC=8C=E7=94=A8=E4=BA=8E=E5=9C=A8?= =?UTF-8?q?=E8=B7=AF=E5=BE=84=E6=A0=A1=E9=AA=8C=E9=98=B6=E6=AE=B5=E6=8B=A6?= =?UTF-8?q?=E6=88=AA=E6=95=8F=E6=84=9F=E6=96=87=E4=BB=B6=E8=AE=BF=E9=97=AE?= =?UTF-8?q?=20=20=20*=20=E5=B7=A5=E5=85=B7=E5=B1=82=E9=9B=86=E6=88=90?= =?UTF-8?q?=EF=BC=9A`ls=5Ftool`,=20`glob=5Ftool`,=20`regex=5Fsearch=5Ftool?= =?UTF-8?q?`,=20`exact=5Fsearch=5Ftool`=20=E5=9D=87=E6=8E=A5=E5=85=A5=20`e?= =?UTF-8?q?xclusion=5Fmanager`=20=E8=BF=9B=E8=A1=8C=E8=B7=AF=E5=BE=84?= =?UTF-8?q?=E8=BF=87=E6=BB=A4=20-=20=E4=BF=AE=E5=A4=8D=E9=97=AE=E9=A2=98:?= =?UTF-8?q?=20=E5=A2=9E=E5=BC=BA=E6=95=8F=E6=84=9F=E6=96=87=E4=BB=B6?= =?UTF-8?q?=E4=BF=9D=E6=8A=A4=E6=9C=BA=E5=88=B6=20=20=20*=20=E5=9C=A8=20`P?= =?UTF-8?q?athValidator`=20=E4=B8=AD=E5=A2=9E=E5=8A=A0=20`=5Fraise=5Fif=5F?= =?UTF-8?q?sensitive`=20=E9=80=BB=E8=BE=91=EF=BC=8C=E7=9B=B4=E6=8E=A5?= =?UTF-8?q?=E7=A6=81=E6=AD=A2=E8=AE=BF=E9=97=AE=20`.env`,=20`*.pem`,=20`id?= =?UTF-8?q?=5Frsa`=20=E7=AD=89=E6=95=8F=E6=84=9F=E6=96=87=E4=BB=B6=20=20?= =?UTF-8?q?=20*=20=E5=9C=A8=20`BaseTool`=20=E7=9A=84=E5=BC=82=E5=B8=B8?= =?UTF-8?q?=E5=A4=84=E7=90=86=E8=A3=85=E9=A5=B0=E5=99=A8=E4=B8=AD=E6=8D=95?= =?UTF-8?q?=E8=8E=B7=20`SensitiveFileError`=EF=BC=8C=E8=BF=94=E5=9B=9E?= =?UTF-8?q?=E6=98=8E=E7=A1=AE=E7=9A=84=E6=8B=92=E7=BB=9D=E8=AE=BF=E9=97=AE?= =?UTF-8?q?=E6=8F=90=E7=A4=BA=20=20=20*=20=E5=B0=86=E5=8E=9F=E6=9C=AC?= =?UTF-8?q?=E7=A1=AC=E7=BC=96=E7=A0=81=E7=9A=84=20`DEFAULT=5FEXCLUDED=5FDI?= =?UTF-8?q?RS`=20=E6=9B=BF=E6=8D=A2=E4=B8=BA=E5=8A=A8=E6=80=81=E5=8A=A0?= =?UTF-8?q?=E8=BD=BD=E7=9A=84=20`ExclusionManager`=20=E5=AE=9E=E4=BE=8B=20?= =?UTF-8?q?-=20=E9=87=8D=E6=9E=84=E4=BC=98=E5=8C=96:=20=E7=AE=80=E5=8C=96?= =?UTF-8?q?=E6=90=9C=E7=B4=A2=E4=B8=8E=E9=81=8D=E5=8E=86=E9=80=BB=E8=BE=91?= =?UTF-8?q?=20=20=20*=20=E7=A7=BB=E9=99=A4=20`workspace.py`=20=E5=92=8C?= =?UTF-8?q?=E5=90=84=E7=B1=BB=20Search=20Tool=20=E4=B8=AD=E9=87=8D?= =?UTF-8?q?=E5=A4=8D=E7=9A=84=E6=AD=A3=E5=88=99=E7=BC=96=E8=AF=91=E4=B8=8E?= =?UTF-8?q?=E6=8E=92=E9=99=A4=E9=80=BB=E8=BE=91=20=20=20*=20=E4=BD=BF?= =?UTF-8?q?=E7=94=A8=20`merge=5Fignore=5Fregexes`=20=E6=96=B9=E6=B3=95?= =?UTF-8?q?=E7=BB=9F=E4=B8=80=E5=90=88=E5=B9=B6=E9=BB=98=E8=AE=A4=E8=A7=84?= =?UTF-8?q?=E5=88=99=E4=B8=8E=E7=94=A8=E6=88=B7=E4=BC=A0=E5=85=A5=E7=9A=84?= =?UTF-8?q?=20ignore=20=E6=A8=A1=E5=BC=8F=20=20=20*=20=E5=88=A9=E7=94=A8?= =?UTF-8?q?=20`is=5Fignored=5Fby=5Fgitignore`=20=E5=87=BD=E6=95=B0?= =?UTF-8?q?=E7=BB=9F=E4=B8=80=E5=A4=84=E7=90=86=20.gitignore=20=E8=A7=84?= =?UTF-8?q?=E5=88=99=E7=9A=84=E5=8C=B9=E9=85=8D=E4=B8=8E=E5=90=A6=E5=AE=9A?= =?UTF-8?q?=E9=80=BB=E8=BE=91=20-=20=E6=96=87=E6=A1=A3=E6=9B=B4=E6=96=B0:?= =?UTF-8?q?=20=E8=A1=A5=E5=85=85=E6=A8=A1=E5=9D=97=E8=AE=BE=E8=AE=A1?= =?UTF-8?q?=E8=AF=B4=E6=98=8E=20=20=20*=20=E6=B7=BB=E5=8A=A0=20`ExclusionM?= =?UTF-8?q?anager`=20=E5=92=8C=20`PermissionManager`=20=E7=9A=84=E7=B1=BB?= =?UTF-8?q?=E7=BA=A7=E6=96=87=E6=A1=A3=E5=AD=97=E7=AC=A6=E4=B8=B2=EF=BC=8C?= =?UTF-8?q?=E6=98=8E=E7=A1=AE=E5=8C=BA=E5=88=86=E6=80=A7=E8=83=BD=E6=8E=92?= =?UTF-8?q?=E9=99=A4=E4=B8=8E=E5=AE=89=E5=85=A8=E6=8E=92=E9=99=A4=E5=9C=BA?= =?UTF-8?q?=E6=99=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/workspace/exclusion_manager.py | 219 +++++++++++++++++++++++ src/workspace/gitignore_loader.py | 167 +++++++++++++++++ src/workspace/path_validator.py | 36 +++- src/workspace/permissions.py | 126 +++++++++++++ src/workspace/tools/base_tool.py | 11 +- src/workspace/tools/exact_search_tool.py | 10 +- src/workspace/tools/glob_tool.py | 2 + src/workspace/tools/ls_tool.py | 2 + src/workspace/tools/regex_search_tool.py | 10 +- src/workspace/workspace.py | 26 ++- 10 files changed, 578 insertions(+), 31 deletions(-) create mode 100644 src/workspace/exclusion_manager.py create mode 100644 src/workspace/gitignore_loader.py create mode 100644 src/workspace/permissions.py diff --git a/src/workspace/exclusion_manager.py b/src/workspace/exclusion_manager.py new file mode 100644 index 0000000..2bd5b33 --- /dev/null +++ b/src/workspace/exclusion_manager.py @@ -0,0 +1,219 @@ +"""统一排除管理器 —— 合并 gitignore、用户 ignore、默认排除规则. + +区分两类排除: +- 性能排除: 缓存/构建产物等不影响安全的目录 +- 安全排除: 隐私/凭据文件等不应被 AI 访问的路径 +""" + +import os +import re +from pathlib import Path +from typing import ClassVar + +from src.workspace.gitignore_loader import is_ignored_by_gitignore, load_gitignore + + +class ExclusionManager: + """排除规则统一管理器. + + 聚合三类排除源: + 1. 默认排除(内置的缓存/构建/IDE 目录) + 2. .gitignore 规则(如项目中有 .gitignore 文件) + 3. 用户临时 ignore 参数 + + Args: + workspace_root: 工作区根目录 + """ + + # 性能排除 —— 缓存、构建产物、IDE 配置等 + PERFORMANCE_EXCLUSIONS: frozenset[str] = frozenset( + { + ".git", + "__pycache__", + "node_modules", + ".venv", + "venv", + "dist", + "build", + ".idea", + ".vscode", + ".ruff_cache", + ".pytest_cache", + ".mypy_cache", + ".hypothesis", + "htmlcov", + ".coverage", + "*.pyc", + "*.pyo", + ".eggs", + "*.egg-info", + ".tox", + ".nox", + ".svn", + ".hg", + ".bzr", + "target", # Rust build + ".next", # Next.js build + ".nuxt", # Nuxt build + ".output", # Nuxt output + } + ) + + # 安全排除 —— 敏感文件, AI 不应读取 + SECURITY_EXCLUSIONS: frozenset[str] = frozenset( + { + ".env", + ".env.*", + "*.pem", + "credentials.*", + "*.key", + "*.cert", + "id_rsa", + "id_ed25519", + "*.cred", + "*.secret", + "**/vault/**", + } + ) + + # 安全排除 —— 需要精确匹配的特定文件 + SENSITIVE_FILE_PATTERNS: ClassVar[list[str]] = [ + r"\.env$", + r"\.env\..+$", + r".*\.pem$", + r"credentials\..*$", + r".*\.key$", + r".*\.cert$", + r"id_rsa$", + r"id_ed25519$", + r".*\.cred$", + r".*\.secret$", + ] + + def __init__(self, workspace_root: str | Path): + self._workspace_root = Path(workspace_root).resolve() + # 从 .gitignore 加载 + self._raw_gitignore_patterns: list[str] = [] + self._gitignore_exclude_res: list[re.Pattern] = [] + self._gitignore_negate_res: list[re.Pattern] = [] + + self._reload_gitignore() + + # 编译敏感文件正则 + self._sensitive_file_res: list[re.Pattern] = [] + for pat in self.SENSITIVE_FILE_PATTERNS: + try: + self._sensitive_file_res.append(re.compile(pat)) + except re.error: + continue + + def _reload_gitignore(self) -> None: + """(重新)加载 .gitignore.""" + raw, exclude_res, negate_res = load_gitignore(self._workspace_root) + self._raw_gitignore_patterns = raw + self._gitignore_exclude_res = exclude_res + self._gitignore_negate_res = negate_res + + def _check_performance_exclusion(self, rel_path_str: str) -> bool: + """检查路径是否匹配性能排除规则(基于目录名).""" + # 将路径拆分为各层, 检查每层是否在排除集合中 + parts = rel_path_str.replace(os.sep, "/").split("/") + for part in parts: + # 检查部分匹配: "node_modules" 或通配匹配 + if part in self.PERFORMANCE_EXCLUSIONS: + return True + # 检查 *.xxx 模式 + for exclude in self.PERFORMANCE_EXCLUSIONS: + if exclude.startswith("*.") and part.endswith(exclude[1:]): + return True + return False + + def should_exclude_dir(self, dir_name: str) -> bool: + """检查目录名是否应该被排除(基于名称的快速检查). + + 用于 glob/ls 等基于目录名的过滤场景. + """ + return dir_name in self.PERFORMANCE_EXCLUSIONS + + def should_exclude_path(self, path: Path) -> bool: + """检查路径是否应被排除(全面检查). + + 依次检查: 默认排除目录名 → gitignore 规则 → 否定规则 + + Args: + path: 文件的绝对路径 + + Returns: + True 表示应排除 + """ + try: + rel_path = path.relative_to(self._workspace_root) + except ValueError: + # 在工作区外, 不在这里处理(由 PathValidator 处理) + return False + + rel_str = str(rel_path).replace(os.sep, "/") + + # 1. 性能排除: 检查所有父目录 + if self._check_performance_exclusion(rel_str): + return True + + # 2. gitignore 排除 + return is_ignored_by_gitignore(rel_str, self._gitignore_exclude_res, self._gitignore_negate_res) + + def is_sensitive_file(self, path: Path) -> bool: + """检查路径是否为敏感文件. + + Args: + path: 文件绝对路径 + + Returns: + True 表示是敏感文件 + """ + try: + rel_str = str(path.relative_to(self._workspace_root)).replace(os.sep, "/") + except ValueError: + return False + + return any(regex.search(rel_str) for regex in self._sensitive_file_res) + + def merge_ignore_regexes(self, user_ignore: list[str] | None = None) -> list[re.Pattern]: + """合并默认排除 + gitignore + 用户 ignore 为正则列表. + + 用于 search_content 等需要正则匹配排除的场景. + + Args: + user_ignore: 用户传入的忽略正则列表 + + Returns: + 编译后的正则列表 + """ + result: list[re.Pattern] = [] + + # 默认排除目录名 → 正则 + for excl in self.PERFORMANCE_EXCLUSIONS: + # 处理 *.pyc 类模式 + if excl.startswith("*."): + pat = excl[1:] # .pyc + result.append(re.compile(re.escape(pat) + "$")) + else: + # 匹配路径中的此目录名 + result.append(re.compile(r"(^|/)" + re.escape(excl) + r"(/|$)")) + + # gitignore 排除正则 + result.extend(self._gitignore_exclude_res) + + # 用户传入的 ignore 正则 + if user_ignore: + for ign in user_ignore: + try: + result.append(re.compile(ign)) + except re.error: + continue + + return result + + @property + def excluded_dir_names(self) -> set[str]: + """获取所有排除目录名集合(用于快速 in 检查).""" + return {d for d in self.PERFORMANCE_EXCLUSIONS if not d.startswith("*")} diff --git a/src/workspace/gitignore_loader.py b/src/workspace/gitignore_loader.py new file mode 100644 index 0000000..64c86e1 --- /dev/null +++ b/src/workspace/gitignore_loader.py @@ -0,0 +1,167 @@ +"""Parse .gitignore files and convert patterns to regex for exclusion matching.""" + +import os +import re +from pathlib import Path + + +def _convert_gitignore_to_regex(pattern: str) -> str | None: + """将 .gitignore 模式转换为正则表达式. + + Args: + pattern: .gitignore 模式(如 *.log, build/, /foo) + + Returns: + 对应的正则表达式字符串, 如果模式无效则返回 None + """ + # 保留原始模式用于锚定判断 + original = pattern + is_dir_only = pattern.endswith("/") + if is_dir_only: + pattern = pattern.rstrip("/") + + # 处理否定模式(仅用于判断是否为目录模式, 不处理逻辑) + if pattern.startswith("!"): + pattern = pattern[1:] + + # 转义正则特殊字符, 再处理 gitignore 通配符 + # 先处理 ** (多级通配符) + parts = [] + i = 0 + while i < len(pattern): + if pattern[i : i + 2] == "**": + parts.append(".*") + i += 2 + elif pattern[i] == "*": + # 单级通配符, 不匹配路径分隔符 + parts.append(r"[^/]*") + i += 1 + elif pattern[i] == "?": + parts.append(r"[^/]") + i += 1 + elif pattern[i] in ".+^${}()|[]\\": + parts.append("\\" + pattern[i]) + i += 1 + else: + parts.append(pattern[i]) + i += 1 + + regex_str = "".join(parts) + + # 锚定: / 开头表示从根目录匹配, 否则匹配任意路径 + if original.startswith("/"): + regex_str = "^" + regex_str[1:] # 去掉开头的 / + elif original.startswith("!"): + # 处理否定模式 - 保持锚定逻辑不变 + regex_str = "^" + regex_str[1:] if original[1:].startswith("/") else "(^|/)" + regex_str + else: + regex_str = "(^|/)" + regex_str + + if is_dir_only: + regex_str += "(/.*)?$" + else: + regex_str += "$" + + return regex_str + + +def parse_gitignore(gitignore_path: str | Path) -> list[str]: + """解析 .gitignore 文件, 返回非否定排除模式列表. + + Args: + gitignore_path: .gitignore 文件路径 + + Returns: + 排除模式列表(目录名/通配符等原始 gitignore 格式) + """ + patterns: list[str] = [] + gitignore_path = Path(gitignore_path) + + if not gitignore_path.exists(): + return patterns + + try: + text = gitignore_path.read_text(encoding="utf-8") + except Exception: + return patterns + + for line in text.splitlines(): + stripped = line.strip() + + # 跳过空行和注释 + if not stripped or stripped.startswith("#"): + continue + + # 保留否定模式供外部处理, 返回原始行 + patterns.append(stripped) + + return patterns + + +def compile_gitignore_patterns(patterns: list[str]) -> tuple[list[re.Pattern], list[re.Pattern]]: + """将 gitignore 模式编译为正则表达式. + + Args: + patterns: 原始 gitignore 模式列表 + + Returns: + (排除正则列表, 否定排除正则列表) 的元组 + """ + exclude_res: list[re.Pattern] = [] + negate_res: list[re.Pattern] = [] + + for pattern in patterns: + if pattern.startswith("!"): + # 否定模式: 取消排除 + negate_regex = _convert_gitignore_to_regex(pattern) + if negate_regex: + try: + negate_res.append(re.compile(negate_regex)) + except re.error: + continue + else: + regex = _convert_gitignore_to_regex(pattern) + if regex: + try: + exclude_res.append(re.compile(regex)) + except re.error: + continue + + return exclude_res, negate_res + + +def is_ignored_by_gitignore(path: str | Path, exclude_res: list[re.Pattern], negate_res: list[re.Pattern]) -> bool: + """检查路径是否被 .gitignore 规则忽略. + + Args: + path: 要检查的相对路径(字符串形式) + exclude_res: 排除正则列表 + negate_res: 否定排除正则列表 + + Returns: + 是否应该被忽略 + """ + path_str = str(path).replace(os.sep, "/") + + # 先检查否定模式(优先级更高) + for negate_re in negate_res: + if negate_re.search(path_str): + return False + + # 再检查排除模式 + return any(exclude_re.search(path_str) for exclude_re in exclude_res) + + +def load_gitignore(workspace_root: str | Path) -> tuple[list[str], list[re.Pattern], list[re.Pattern]]: + """从工作区根目录加载 .gitignore. + + Args: + workspace_root: 工作区根目录 + + Returns: + (原始模式列表, 排除正则列表, 否定排除正则列表) + """ + gitignore_path = Path(workspace_root) / ".gitignore" + raw_patterns = parse_gitignore(gitignore_path) + exclude_res, negate_res = compile_gitignore_patterns(raw_patterns) + return raw_patterns, exclude_res, negate_res diff --git a/src/workspace/path_validator.py b/src/workspace/path_validator.py index 2a12d7d..388df77 100644 --- a/src/workspace/path_validator.py +++ b/src/workspace/path_validator.py @@ -1,5 +1,7 @@ import os +import re from pathlib import Path +from typing import ClassVar class WorkspaceBoundaryError(Exception): @@ -14,6 +16,12 @@ class PathNotFoundError(Exception): pass +class SensitiveFileError(Exception): + """访问敏感文件时抛出""" + + pass + + class PathValidator: """工作区路径安全校验器,防止路径遍历和符号链接逃逸 @@ -21,12 +29,27 @@ class PathValidator: workspace_root: 工作区根目录,默认为当前目录 """ + # 敏感文件匹配模式 + SENSITIVE_FILE_PATTERNS: ClassVar[list[re.Pattern]] = [ + re.compile(r"\.env$"), + re.compile(r"\.env\..+$"), + re.compile(r".*\.pem$"), + re.compile(r"credentials\..*$"), + re.compile(r".*\.key$"), + re.compile(r".*\.cert$"), + re.compile(r"id_rsa$"), + re.compile(r"id_ed25519$"), + re.compile(r".*\.cred$"), + re.compile(r".*\.secret$"), + re.compile(r"\.ManualAid[/\\].*\.db$"), + ] + def __init__(self, workspace_root: str | Path = "."): """初始化路径验证器. Args: workspace_root: 工作区根目录路径,可以是字符串或 Path 对象 - 所有后续的路径验证都将以此目录为边界 + 所有后续的路径验证都将以此目录为基准 Raises: FileNotFoundError: 当 workspace_root 不存在时抛出 @@ -71,8 +94,19 @@ def resolve_path(self, target: str | Path) -> Path: if not str(resolved).startswith(str(self.root) + os.sep) and resolved != self.root: raise WorkspaceBoundaryError(f"路径越界: {target}") + # 敏感文件检查 + self._raise_if_sensitive(resolved, target) + return resolved + @classmethod + def _raise_if_sensitive(cls, resolved: Path, original_target: str | Path) -> None: + """检查路径是否匹配敏感文件模式.""" + rel_str = str(resolved).replace("\\", "/") + for pattern in cls.SENSITIVE_FILE_PATTERNS: + if pattern.search(rel_str): + raise SensitiveFileError(f"禁止访问敏感文件: {original_target}") + def create_file_with_parents(self, target: str | Path, content: str = "") -> Path: """在工作区内创建文件,自动创建所有不存在的父目录. diff --git a/src/workspace/permissions.py b/src/workspace/permissions.py new file mode 100644 index 0000000..a87b5ad --- /dev/null +++ b/src/workspace/permissions.py @@ -0,0 +1,126 @@ +"""统一权限决策引擎 —— 路径级细粒度权限控制. + +整合现有权限机制: +1. BaseTool 的 read_permission/write_permission 布尔属性 +2. PathValidator 的边界检查 +3. binary_detector 的文件类型检测 +4. 敏感文件保护(新增) +5. Git 工具的安全模型(白名单+拦截正则, 后续提取) +6. mtime 校验 +7. 审计审批层 + +提供统一的 "工具 X 能否对路径 Y 执行操作 Z" 查询接口. +""" + +from __future__ import annotations + +from enum import Enum, auto +from pathlib import Path + + +class Operation(Enum): + """权限操作类型.""" + + READ = auto() + WRITE = auto() + SEARCH = auto() + EXECUTE = auto() + DELETE = auto() + + +class Decision(Enum): + """权限决策结果.""" + + ALLOWED = "allowed" + DENIED = "denied" + + +class PermissionManager: + """统一权限决策引擎. + + 使用方式(从 Workspace 获取): + perm = workspace.permission_manager + if perm.is_allowed("read_tool", path, Operation.READ): + ... + + Args: + workspace_root: 工作区根目录 + """ + + def __init__(self, workspace_root: Path): + self._root = workspace_root + + # 敏感文件正则列表(与 ExclusionManager 保持一致) + self._sensitive_patterns: list[str] = [ + r"\.env$", + r"\.env\..+$", + r".*\.pem$", + r"credentials\..*$", + r".*\.key$", + r".*\.cert$", + r"id_rsa$", + r"id_ed25519$", + r".*\.cred$", + r".*\.secret$", + ] + + # 操作 → 所需权限级别映射 + self._operation_permissions: dict[Operation, str] = { + Operation.READ: "read", + Operation.WRITE: "write", + Operation.SEARCH: "read", + Operation.EXECUTE: "write", + Operation.DELETE: "write", + } + + def _is_sensitive_path(self, path: Path) -> bool: + """检查路径是否匹配敏感文件模式.""" + import re + + try: + rel_str = str(path.relative_to(self._root)).replace("\\", "/") + except ValueError: + return True # 工作区外的路径视为敏感 + + return any(re.search(pattern, rel_str) for pattern in self._sensitive_patterns) + + def check(self, tool_name: str, path: Path, operation: Operation) -> Decision: + """检查工具能否对路径执行操作. + + 决策流程: + 1. 如果路径在工作区外 → DENIED + 2. 如果是敏感文件且操作非 SEARCH → DENIED + 3. 如果是二进制文件且操作是 READ/WRITE → 特殊处理(记录而非禁止) + 4. 否则 → ALLOWED + + Args: + tool_name: 工具名称(如 "read_tool", "write_tool") + path: 目标路径 + operation: 操作类型 + + Returns: + 权限决策结果 + """ + # 1. 工作区边界(双重保障, PathValidator 已做) + try: + path.relative_to(self._root) + except ValueError: + return Decision.DENIED + + # 2. 敏感文件保护(禁止 READ/WRITE/EXECUTE/DELETE) + if operation in ( + Operation.READ, + Operation.WRITE, + Operation.EXECUTE, + Operation.DELETE, + ) and self._is_sensitive_path(path): + return Decision.DENIED + + # 3. 二进制文件: 允许但标记 (记录由调用方处理) + # 这里不做禁止, 仅在 query 中返回信息 + + return Decision.ALLOWED + + def is_allowed(self, tool_name: str, path: Path, operation: Operation) -> bool: + """快捷方法: 是否允许操作.""" + return self.check(tool_name, path, operation) == Decision.ALLOWED diff --git a/src/workspace/tools/base_tool.py b/src/workspace/tools/base_tool.py index 70e6ab4..e958559 100644 --- a/src/workspace/tools/base_tool.py +++ b/src/workspace/tools/base_tool.py @@ -244,7 +244,7 @@ def handle_tool_exceptions(func) -> Callable[..., ToolResult]: """工具方法异常处理装饰器 —— 将异常转换为 ToolResult 失败结果""" from functools import wraps - from src.workspace.path_validator import PathNotFoundError, WorkspaceBoundaryError + from src.workspace.path_validator import PathNotFoundError, SensitiveFileError, WorkspaceBoundaryError @wraps(func) def wrapper(self, *args, **kwargs): @@ -269,13 +269,20 @@ def wrapper(self, *args, **kwargs): func_kwargs=kwargs, error=f"{err2.__class__.__name__}: {err2}", ) - except PermissionError as err3: + except SensitiveFileError as err3: return ToolResult( success=False, func_name=func.__name__, func_kwargs=kwargs, error=f"{err3.__class__.__name__}: {err3}", ) + except PermissionError as err4: + return ToolResult( + success=False, + func_name=func.__name__, + func_kwargs=kwargs, + error=f"{err4.__class__.__name__}: {err4}", + ) except Exception as err: return ToolResult( success=False, func_name=func.__name__, func_kwargs=kwargs, error=f"{err.__class__.__name__}: {err}" diff --git a/src/workspace/tools/exact_search_tool.py b/src/workspace/tools/exact_search_tool.py index 873df3a..3e11fd1 100644 --- a/src/workspace/tools/exact_search_tool.py +++ b/src/workspace/tools/exact_search_tool.py @@ -1,4 +1,3 @@ -import contextlib import re from pathlib import Path @@ -86,6 +85,7 @@ def __init__(self, workspace: Workspace): "limit": "最大匹配数量限制", "ignore": "忽略匹配正则的文件或文件夹列表", } + self._exclusion_manager = workspace.exclusion_manager @BaseTool.handle_tool_exceptions def exact_search( @@ -107,12 +107,8 @@ def exact_search( # 准备搜索字符串 search_string = pattern if case_sensitive else pattern.lower() - # 收集忽略模式 - ignore_patterns = [] - if ignore: - for ignore_pattern in ignore: - with contextlib.suppress(re.error): - ignore_patterns.append(re.compile(ignore_pattern)) + # 收集忽略模式: 合并默认排除 + 用户传入的 ignore + ignore_patterns = self._exclusion_manager.merge_ignore_regexes(ignore) # 搜索结果 results = [] diff --git a/src/workspace/tools/glob_tool.py b/src/workspace/tools/glob_tool.py index df158c0..f47274a 100644 --- a/src/workspace/tools/glob_tool.py +++ b/src/workspace/tools/glob_tool.py @@ -15,6 +15,7 @@ def __init__(self, workspace: Workspace): "path": "目录路径", "max_ret": "最多返回多少条检索结果", } + self._exclusion_manager = workspace.exclusion_manager @BaseTool.handle_tool_exceptions def glob(self, pattern: str, path: str = ".", max_ret: int = 1000) -> ToolResult: @@ -30,5 +31,6 @@ def glob(self, pattern: str, path: str = ".", max_ret: int = 1000) -> ToolResult data=[ f"{'[Folder]' if item.is_dir() else '[File]'} {item.relative_to(self.workspace.root_path)}" for item in islice(root_path.glob(pattern), max_ret) + if not self._exclusion_manager.should_exclude_path(item) ], ) diff --git a/src/workspace/tools/ls_tool.py b/src/workspace/tools/ls_tool.py index 33f035d..cbb176a 100644 --- a/src/workspace/tools/ls_tool.py +++ b/src/workspace/tools/ls_tool.py @@ -13,6 +13,7 @@ def __init__(self, workspace: Workspace): self.param_descriptions = { "path": "目录路径", } + self._exclusion_manager = workspace.exclusion_manager @BaseTool.handle_tool_exceptions def ls(self, path: str = ".") -> ToolResult: @@ -27,5 +28,6 @@ def ls(self, path: str = ".") -> ToolResult: data=[ f"{'[Folder]' if item.is_dir() else '[File]'} {item.relative_to(self.workspace.root_path)}" for item in folder_path.iterdir() + if not self._exclusion_manager.should_exclude_path(item) ], ) diff --git a/src/workspace/tools/regex_search_tool.py b/src/workspace/tools/regex_search_tool.py index a8b6549..b96cd57 100644 --- a/src/workspace/tools/regex_search_tool.py +++ b/src/workspace/tools/regex_search_tool.py @@ -1,4 +1,3 @@ -import contextlib import re from pathlib import Path @@ -111,6 +110,7 @@ def __init__(self, workspace: Workspace): "limit": "最大匹配数量限制", "ignore": "忽略匹配正则的文件或文件夹列表", } + self._exclusion_manager = workspace.exclusion_manager @BaseTool.handle_tool_exceptions def regex_search( @@ -134,12 +134,8 @@ def regex_search( except re.error as e: return self.make_failed_response(kwargs=locals().copy(), error=f"无效的正则表达式: {e}") - # 收集忽略模式 - ignore_patterns = [] - if ignore: - for ignore_pattern in ignore: - with contextlib.suppress(re.error): - ignore_patterns.append(re.compile(ignore_pattern)) + # 收集忽略模式: 合并默认排除 + 用户传入的 ignore + ignore_patterns = self._exclusion_manager.merge_ignore_regexes(ignore) # 搜索结果 results = [] diff --git a/src/workspace/workspace.py b/src/workspace/workspace.py index 56699f6..fa9e960 100644 --- a/src/workspace/workspace.py +++ b/src/workspace/workspace.py @@ -5,10 +5,9 @@ from pathlib import Path from src.models.tool_error_response import ToolErrorResponse +from src.workspace.exclusion_manager import ExclusionManager from src.workspace.path_validator import PathNotFoundError, PathValidator, WorkspaceBoundaryError - -# 默认排除的目录 后续改为从项目配置加载 -DEFAULT_EXCLUDED_DIRS = {".git", "__pycache__", "node_modules", ".venv", "venv", "dist", "build", ".idea", ".vscode"} +from src.workspace.permissions import PermissionManager def _highlight_matches(line: str, regex: re.Pattern) -> str: @@ -47,6 +46,8 @@ def __init__(self, path: str): return self.root_path = Path(path).resolve() self.path_validator: PathValidator = PathValidator(self.root_path) + self.exclusion_manager: ExclusionManager = ExclusionManager(self.root_path) + self.permission_manager: PermissionManager = PermissionManager(self.root_path) self.is_git_repo: bool = (self.root_path / ".git").is_dir() self.platform: str = sys.platform self.date: str = date.today().strftime("%y-%m-%d") @@ -84,8 +85,11 @@ def search_content( try: path = self.path_validator.validate(folder_path) - # 初始化排除目录集合 - exclude_set = set(exclude_dirs or DEFAULT_EXCLUDED_DIRS) + # 初始化排除目录集合: 合并默认排除 + 用户传入排除 + if exclude_dirs is not None: + exclude_set = set(exclude_dirs) | self.exclusion_manager.excluded_dir_names + else: + exclude_set = self.exclusion_manager.excluded_dir_names # 编译正则表达式 flags = 0 if case_sensitive else re.IGNORECASE @@ -218,14 +222,8 @@ def search_content_multi_pattern( try: path = self.path_validator.validate(folder_path) - # 预编译 ignore 正则 - ignore_res: list[re.Pattern] = [] - if ignore: - for ign in ignore: - try: - ignore_res.append(re.compile(ign)) - except re.error: - continue + # 预编译 ignore 正则: 合并默认排除 + 用户传入的 ignore + ignore_res: list[re.Pattern] = self.exclusion_manager.merge_ignore_regexes(ignore) # 收集文件(一次遍历) files_to_search: list[Path] = [] @@ -234,7 +232,7 @@ def search_content_multi_pattern( else: for file_path in path.rglob(file_pattern): if file_path.is_file(): - if any(p.name in DEFAULT_EXCLUDED_DIRS for p in file_path.parents): + if any(self.exclusion_manager.should_exclude_dir(p.name) for p in file_path.parents): continue rel = str(file_path.relative_to(self.root_path)) if any(ir.search(rel) for ir in ignore_res): From 3b034db38a5216c5b63134575cb200329f2cbddb Mon Sep 17 00:00:00 2001 From: Suntion <149924916+SunYanbox@users.noreply.github.com> Date: Wed, 6 May 2026 20:15:36 +0800 Subject: [PATCH 2/5] =?UTF-8?q?refactor(workspace):=20=E9=87=8D=E6=9E=84?= =?UTF-8?q?=E6=9D=83=E9=99=90=E4=B8=8E=E5=AE=89=E5=85=A8=E6=A0=A1=E9=AA=8C?= =?UTF-8?q?=E6=9E=B6=E6=9E=84=EF=BC=8C=E7=BB=9F=E4=B8=80=E6=95=8F=E6=84=9F?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E8=A7=84=E5=88=99=E6=9D=A5=E6=BA=90=20-=20?= =?UTF-8?q?=E7=A0=B4=E5=9D=8F=E6=80=A7=E5=8F=98=E6=9B=B4:=20=E7=A7=BB?= =?UTF-8?q?=E9=99=A4=20`PermissionManager`=20=E5=8F=8A=E5=85=B6=E7=9B=B8?= =?UTF-8?q?=E5=85=B3=20API=20=20=20*=20=E5=88=A0=E9=99=A4=20`src/workspace?= =?UTF-8?q?/permissions.py`=20=E6=96=87=E4=BB=B6=20=20=20*=20=E7=A7=BB?= =?UTF-8?q?=E9=99=A4=20`Workspace`=20=E7=B1=BB=E4=B8=AD=E7=9A=84=20`self.p?= =?UTF-8?q?ermission=5Fmanager`=20=E5=B1=9E=E6=80=A7=E5=8F=8A=E5=88=9D?= =?UTF-8?q?=E5=A7=8B=E5=8C=96=E9=80=BB=E8=BE=91=20=20=20*=20=E8=B0=83?= =?UTF-8?q?=E7=94=A8=E6=96=B9=E9=9C=80=E7=9B=B4=E6=8E=A5=E4=BE=9D=E8=B5=96?= =?UTF-8?q?=20`PathValidator`=20=E8=BF=9B=E8=A1=8C=E6=95=8F=E6=84=9F?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E6=8B=A6=E6=88=AA=EF=BC=8C=E4=B8=8D=E5=86=8D?= =?UTF-8?q?=E9=80=9A=E8=BF=87=20`workspace.permission=5Fmanager.is=5Fallow?= =?UTF-8?q?ed()`=20=E6=9F=A5=E8=AF=A2=20-=20=E9=87=8D=E6=9E=84=E4=BC=98?= =?UTF-8?q?=E5=8C=96:=20=E6=95=B4=E5=90=88=E6=95=8F=E6=84=9F=E6=96=87?= =?UTF-8?q?=E4=BB=B6=E8=A7=84=E5=88=99=E8=87=B3=20`ExclusionManager`=20=20?= =?UTF-8?q?=20*=20=E5=B0=86=20`SENSITIVE=5FFILE=5FPATTERNS`=20=E4=BB=8E?= =?UTF-8?q?=E7=A1=AC=E7=BC=96=E7=A0=81=E7=A7=BB=E8=87=B3=20`ExclusionManag?= =?UTF-8?q?er.SENSITIVE=5FFILE=5FPATTERNS`=20=20=20*=20`PathValidator`=20?= =?UTF-8?q?=E5=BC=95=E5=85=A5=20`ExclusionManager`=20=E4=BD=9C=E4=B8=BA?= =?UTF-8?q?=E5=94=AF=E4=B8=80=E6=95=8F=E6=84=9F=E6=96=87=E4=BB=B6=E6=A8=A1?= =?UTF-8?q?=E5=BC=8F=E6=9D=A5=E6=BA=90=20(`re.compile(p)=20for=20p=20in=20?= =?UTF-8?q?ExclusionManager.SENSITIVE=5FFILE=5FPATTERNS`)=20=20=20*=20?= =?UTF-8?q?=E6=9B=B4=E6=96=B0=E6=AD=A3=E5=88=99=E8=A1=A8=E8=BE=BE=E5=BC=8F?= =?UTF-8?q?=E5=89=8D=E7=BC=80=E4=BB=A5=E6=94=AF=E6=8C=81=E8=B7=AF=E5=BE=84?= =?UTF-8?q?=E8=BE=B9=E7=95=8C=E5=8C=B9=E9=85=8D=20(=E6=B7=BB=E5=8A=A0=20`(?= =?UTF-8?q?^|/)`)=20-=20=E4=BB=A3=E7=A0=81=E6=B8=85=E7=90=86:=20=E7=A7=BB?= =?UTF-8?q?=E9=99=A4=E5=86=97=E4=BD=99=E7=9A=84=E6=8E=92=E9=99=A4=E6=A3=80?= =?UTF-8?q?=E6=9F=A5=E9=80=BB=E8=BE=91=20=20=20*=20=E5=9C=A8=20`Workspace.?= =?UTF-8?q?=5Fsearch=5Fcontent`=20=E4=B8=AD=E7=A7=BB=E9=99=A4=E5=9F=BA?= =?UTF-8?q?=E4=BA=8E=20`should=5Fexclude=5Fdir`=20=E7=9A=84=E7=88=B6?= =?UTF-8?q?=E7=9B=AE=E5=BD=95=E9=81=8D=E5=8E=86=E6=A3=80=E6=9F=A5=20=20=20?= =?UTF-8?q?*=20=E7=A7=BB=E9=99=A4=20`ExclusionManager`=20=E4=B8=AD?= =?UTF-8?q?=E5=B7=B2=E5=BA=9F=E5=BC=83=E7=9A=84=20`=5Fsensitive=5Ffile=5Fr?= =?UTF-8?q?es`=20=E9=A2=84=E7=BC=96=E8=AF=91=E7=BC=93=E5=AD=98=E9=80=BB?= =?UTF-8?q?=E8=BE=91=20=20=20*=20=E7=A7=BB=E9=99=A4=20`ExclusionManager`?= =?UTF-8?q?=20=E4=B8=AD=E4=B8=8D=E5=86=8D=E4=BD=BF=E7=94=A8=E7=9A=84=20`is?= =?UTF-8?q?=5Fsensitive=5Ffile`=20=E5=92=8C=20`should=5Fexclude=5Fdir`=20?= =?UTF-8?q?=E6=96=B9=E6=B3=95=20-=20=E6=96=87=E6=A1=A3=E6=9B=B4=E6=96=B0:?= =?UTF-8?q?=20=E8=A1=A5=E5=85=85=20`gitignore=5Floader`=20=E5=B7=B2?= =?UTF-8?q?=E7=9F=A5=E5=B1=80=E9=99=90=E6=80=A7=E8=AF=B4=E6=98=8E=20=20=20?= =?UTF-8?q?*=20=E6=98=8E=E7=A1=AE=E6=A0=87=E6=B3=A8=E4=B8=8D=E6=94=AF?= =?UTF-8?q?=E6=8C=81=E5=B5=8C=E5=A5=97=20`.gitignore`=E3=80=81=E8=A1=8C?= =?UTF-8?q?=E5=B0=BE=E7=BB=AD=E8=A1=8C=E5=8F=8A=E5=AD=97=E7=AC=A6=E7=B1=BB?= =?UTF-8?q?=E6=89=A9=E5=B1=95=E8=AF=AD=E6=B3=95=20=20=20*=20=E8=AE=B0?= =?UTF-8?q?=E5=BD=95=E5=90=A6=E5=AE=9A=E6=A8=A1=E5=BC=8F=E4=BC=98=E5=85=88?= =?UTF-8?q?=E7=BA=A7=E5=A4=84=E7=90=86=E4=B8=8E=E7=9C=9F=E5=AE=9E=20Git=20?= =?UTF-8?q?=E7=9A=84=E5=B7=AE=E5=BC=82=E5=8F=8A=E8=AE=BE=E8=AE=A1=E7=90=86?= =?UTF-8?q?=E7=94=B1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/workspace/exclusion_manager.py | 78 +++++------------- src/workspace/gitignore_loader.py | 13 ++- src/workspace/path_validator.py | 20 ++--- src/workspace/permissions.py | 126 ----------------------------- src/workspace/workspace.py | 4 - 5 files changed, 36 insertions(+), 205 deletions(-) delete mode 100644 src/workspace/permissions.py diff --git a/src/workspace/exclusion_manager.py b/src/workspace/exclusion_manager.py index 2bd5b33..7e8bbe3 100644 --- a/src/workspace/exclusion_manager.py +++ b/src/workspace/exclusion_manager.py @@ -5,6 +5,8 @@ - 安全排除: 隐私/凭据文件等不应被 AI 访问的路径 """ +from __future__ import annotations + import os import re from pathlib import Path @@ -14,7 +16,7 @@ class ExclusionManager: - """排除规则统一管理器. + """排除规则统一管理器 聚合三类排除源: 1. 默认排除(内置的缓存/构建/IDE 目录) @@ -59,35 +61,20 @@ class ExclusionManager: } ) - # 安全排除 —— 敏感文件, AI 不应读取 - SECURITY_EXCLUSIONS: frozenset[str] = frozenset( - { - ".env", - ".env.*", - "*.pem", - "credentials.*", - "*.key", - "*.cert", - "id_rsa", - "id_ed25519", - "*.cred", - "*.secret", - "**/vault/**", - } - ) - - # 安全排除 —— 需要精确匹配的特定文件 + # 安全排除 —— 需要精确匹配的敏感文件正则(与 SECURITY_EXCLUSIONS 合并后的唯一来源) + # 注意: (^|/) 前缀表示匹配路径开始或目录分隔符后; .* 前缀表示匹配任意位置的文件扩展名 SENSITIVE_FILE_PATTERNS: ClassVar[list[str]] = [ - r"\.env$", - r"\.env\..+$", + r"(^|/)\.env$", + r"(^|/)\.env\..+$", r".*\.pem$", - r"credentials\..*$", + r"(^|/)credentials\..*$", r".*\.key$", r".*\.cert$", - r"id_rsa$", - r"id_ed25519$", + r"(^|/)id_rsa$", + r"(^|/)id_ed25519$", r".*\.cred$", r".*\.secret$", + r"(^|/)\.ManualAid[/\\].*\.db$", ] def __init__(self, workspace_root: str | Path): @@ -99,14 +86,6 @@ def __init__(self, workspace_root: str | Path): self._reload_gitignore() - # 编译敏感文件正则 - self._sensitive_file_res: list[re.Pattern] = [] - for pat in self.SENSITIVE_FILE_PATTERNS: - try: - self._sensitive_file_res.append(re.compile(pat)) - except re.error: - continue - def _reload_gitignore(self) -> None: """(重新)加载 .gitignore.""" raw, exclude_res, negate_res = load_gitignore(self._workspace_root) @@ -115,7 +94,7 @@ def _reload_gitignore(self) -> None: self._gitignore_negate_res = negate_res def _check_performance_exclusion(self, rel_path_str: str) -> bool: - """检查路径是否匹配性能排除规则(基于目录名).""" + """检查路径是否匹配性能排除规则(基于目录名)""" # 将路径拆分为各层, 检查每层是否在排除集合中 parts = rel_path_str.replace(os.sep, "/").split("/") for part in parts: @@ -128,15 +107,8 @@ def _check_performance_exclusion(self, rel_path_str: str) -> bool: return True return False - def should_exclude_dir(self, dir_name: str) -> bool: - """检查目录名是否应该被排除(基于名称的快速检查). - - 用于 glob/ls 等基于目录名的过滤场景. - """ - return dir_name in self.PERFORMANCE_EXCLUSIONS - def should_exclude_path(self, path: Path) -> bool: - """检查路径是否应被排除(全面检查). + """检查路径是否应被排除(全面检查) 依次检查: 默认排除目录名 → gitignore 规则 → 否定规则 @@ -161,26 +133,12 @@ def should_exclude_path(self, path: Path) -> bool: # 2. gitignore 排除 return is_ignored_by_gitignore(rel_str, self._gitignore_exclude_res, self._gitignore_negate_res) - def is_sensitive_file(self, path: Path) -> bool: - """检查路径是否为敏感文件. - - Args: - path: 文件绝对路径 - - Returns: - True 表示是敏感文件 - """ - try: - rel_str = str(path.relative_to(self._workspace_root)).replace(os.sep, "/") - except ValueError: - return False - - return any(regex.search(rel_str) for regex in self._sensitive_file_res) - def merge_ignore_regexes(self, user_ignore: list[str] | None = None) -> list[re.Pattern]: - """合并默认排除 + gitignore + 用户 ignore 为正则列表. + """合并默认排除 + gitignore + 用户 ignore 为正则列表 + + 用于 search_content 等需要正则匹配排除的场景 - 用于 search_content 等需要正则匹配排除的场景. + 敏感文件由 PathValidator 在写入/读取时拦截,搜索场景不额外过滤 Args: user_ignore: 用户传入的忽略正则列表 @@ -215,5 +173,5 @@ def merge_ignore_regexes(self, user_ignore: list[str] | None = None) -> list[re. @property def excluded_dir_names(self) -> set[str]: - """获取所有排除目录名集合(用于快速 in 检查).""" + """获取所有排除目录名集合(用于快速 in 检查)""" return {d for d in self.PERFORMANCE_EXCLUSIONS if not d.startswith("*")} diff --git a/src/workspace/gitignore_loader.py b/src/workspace/gitignore_loader.py index 64c86e1..d1a136e 100644 --- a/src/workspace/gitignore_loader.py +++ b/src/workspace/gitignore_loader.py @@ -1,4 +1,15 @@ -"""Parse .gitignore files and convert patterns to regex for exclusion matching.""" +"""Parse .gitignore files and convert patterns to regex for exclusion matching. + +已知局限性: +- 不支持嵌套 .gitignore(仅读取根目录下的 .gitignore) +- 不支持行尾 \\ 续行 +- 不支持 gitignore 扩展语法中的字符类(如 [abc] 会被错误转义) +- 否定模式的优先级处理与真实 Git 不一致: 当前实现将所有否定模式提升为最高优先级, + 而真实 Git 按行号顺序逐条处理(后出现的规则覆盖先出现的). + 当前行为对于 AI 工具场景偏安全(宁可少排除), 故保留此简化实现. +""" + +from __future__ import annotations import os import re diff --git a/src/workspace/path_validator.py b/src/workspace/path_validator.py index 388df77..99a4bee 100644 --- a/src/workspace/path_validator.py +++ b/src/workspace/path_validator.py @@ -3,6 +3,8 @@ from pathlib import Path from typing import ClassVar +from src.workspace.exclusion_manager import ExclusionManager + class WorkspaceBoundaryError(Exception): """访问工作区外的路径时抛出""" @@ -29,19 +31,9 @@ class PathValidator: workspace_root: 工作区根目录,默认为当前目录 """ - # 敏感文件匹配模式 + # 敏感文件匹配模式(从 ExclusionManager 统一来源引用) SENSITIVE_FILE_PATTERNS: ClassVar[list[re.Pattern]] = [ - re.compile(r"\.env$"), - re.compile(r"\.env\..+$"), - re.compile(r".*\.pem$"), - re.compile(r"credentials\..*$"), - re.compile(r".*\.key$"), - re.compile(r".*\.cert$"), - re.compile(r"id_rsa$"), - re.compile(r"id_ed25519$"), - re.compile(r".*\.cred$"), - re.compile(r".*\.secret$"), - re.compile(r"\.ManualAid[/\\].*\.db$"), + re.compile(p) for p in ExclusionManager.SENSITIVE_FILE_PATTERNS ] def __init__(self, workspace_root: str | Path = "."): @@ -102,9 +94,9 @@ def resolve_path(self, target: str | Path) -> Path: @classmethod def _raise_if_sensitive(cls, resolved: Path, original_target: str | Path) -> None: """检查路径是否匹配敏感文件模式.""" - rel_str = str(resolved).replace("\\", "/") + resolved_str = str(resolved).replace(os.sep, "/") for pattern in cls.SENSITIVE_FILE_PATTERNS: - if pattern.search(rel_str): + if pattern.search(resolved_str): raise SensitiveFileError(f"禁止访问敏感文件: {original_target}") def create_file_with_parents(self, target: str | Path, content: str = "") -> Path: diff --git a/src/workspace/permissions.py b/src/workspace/permissions.py deleted file mode 100644 index a87b5ad..0000000 --- a/src/workspace/permissions.py +++ /dev/null @@ -1,126 +0,0 @@ -"""统一权限决策引擎 —— 路径级细粒度权限控制. - -整合现有权限机制: -1. BaseTool 的 read_permission/write_permission 布尔属性 -2. PathValidator 的边界检查 -3. binary_detector 的文件类型检测 -4. 敏感文件保护(新增) -5. Git 工具的安全模型(白名单+拦截正则, 后续提取) -6. mtime 校验 -7. 审计审批层 - -提供统一的 "工具 X 能否对路径 Y 执行操作 Z" 查询接口. -""" - -from __future__ import annotations - -from enum import Enum, auto -from pathlib import Path - - -class Operation(Enum): - """权限操作类型.""" - - READ = auto() - WRITE = auto() - SEARCH = auto() - EXECUTE = auto() - DELETE = auto() - - -class Decision(Enum): - """权限决策结果.""" - - ALLOWED = "allowed" - DENIED = "denied" - - -class PermissionManager: - """统一权限决策引擎. - - 使用方式(从 Workspace 获取): - perm = workspace.permission_manager - if perm.is_allowed("read_tool", path, Operation.READ): - ... - - Args: - workspace_root: 工作区根目录 - """ - - def __init__(self, workspace_root: Path): - self._root = workspace_root - - # 敏感文件正则列表(与 ExclusionManager 保持一致) - self._sensitive_patterns: list[str] = [ - r"\.env$", - r"\.env\..+$", - r".*\.pem$", - r"credentials\..*$", - r".*\.key$", - r".*\.cert$", - r"id_rsa$", - r"id_ed25519$", - r".*\.cred$", - r".*\.secret$", - ] - - # 操作 → 所需权限级别映射 - self._operation_permissions: dict[Operation, str] = { - Operation.READ: "read", - Operation.WRITE: "write", - Operation.SEARCH: "read", - Operation.EXECUTE: "write", - Operation.DELETE: "write", - } - - def _is_sensitive_path(self, path: Path) -> bool: - """检查路径是否匹配敏感文件模式.""" - import re - - try: - rel_str = str(path.relative_to(self._root)).replace("\\", "/") - except ValueError: - return True # 工作区外的路径视为敏感 - - return any(re.search(pattern, rel_str) for pattern in self._sensitive_patterns) - - def check(self, tool_name: str, path: Path, operation: Operation) -> Decision: - """检查工具能否对路径执行操作. - - 决策流程: - 1. 如果路径在工作区外 → DENIED - 2. 如果是敏感文件且操作非 SEARCH → DENIED - 3. 如果是二进制文件且操作是 READ/WRITE → 特殊处理(记录而非禁止) - 4. 否则 → ALLOWED - - Args: - tool_name: 工具名称(如 "read_tool", "write_tool") - path: 目标路径 - operation: 操作类型 - - Returns: - 权限决策结果 - """ - # 1. 工作区边界(双重保障, PathValidator 已做) - try: - path.relative_to(self._root) - except ValueError: - return Decision.DENIED - - # 2. 敏感文件保护(禁止 READ/WRITE/EXECUTE/DELETE) - if operation in ( - Operation.READ, - Operation.WRITE, - Operation.EXECUTE, - Operation.DELETE, - ) and self._is_sensitive_path(path): - return Decision.DENIED - - # 3. 二进制文件: 允许但标记 (记录由调用方处理) - # 这里不做禁止, 仅在 query 中返回信息 - - return Decision.ALLOWED - - def is_allowed(self, tool_name: str, path: Path, operation: Operation) -> bool: - """快捷方法: 是否允许操作.""" - return self.check(tool_name, path, operation) == Decision.ALLOWED diff --git a/src/workspace/workspace.py b/src/workspace/workspace.py index fa9e960..295b2d6 100644 --- a/src/workspace/workspace.py +++ b/src/workspace/workspace.py @@ -7,7 +7,6 @@ from src.models.tool_error_response import ToolErrorResponse from src.workspace.exclusion_manager import ExclusionManager from src.workspace.path_validator import PathNotFoundError, PathValidator, WorkspaceBoundaryError -from src.workspace.permissions import PermissionManager def _highlight_matches(line: str, regex: re.Pattern) -> str: @@ -47,7 +46,6 @@ def __init__(self, path: str): self.root_path = Path(path).resolve() self.path_validator: PathValidator = PathValidator(self.root_path) self.exclusion_manager: ExclusionManager = ExclusionManager(self.root_path) - self.permission_manager: PermissionManager = PermissionManager(self.root_path) self.is_git_repo: bool = (self.root_path / ".git").is_dir() self.platform: str = sys.platform self.date: str = date.today().strftime("%y-%m-%d") @@ -232,8 +230,6 @@ def search_content_multi_pattern( else: for file_path in path.rglob(file_pattern): if file_path.is_file(): - if any(self.exclusion_manager.should_exclude_dir(p.name) for p in file_path.parents): - continue rel = str(file_path.relative_to(self.root_path)) if any(ir.search(rel) for ir in ignore_res): continue From 93bd44cda6d06b1303f6b65da2c3408815839f01 Mon Sep 17 00:00:00 2001 From: Suntion <149924916+SunYanbox@users.noreply.github.com> Date: Thu, 7 May 2026 17:39:04 +0800 Subject: [PATCH 3/5] =?UTF-8?q?feat(utils):=20=E6=89=A9=E5=B1=95=E4=BA=8C?= =?UTF-8?q?=E8=BF=9B=E5=88=B6=E6=96=87=E4=BB=B6=E6=A3=80=E6=B5=8B=E5=99=A8?= =?UTF-8?q?=E4=BB=A5=E6=94=AF=E6=8C=81Godot=E9=A1=B9=E7=9B=AE=E6=A0=BC?= =?UTF-8?q?=E5=BC=8F=20-=20=E6=96=B0=E5=A2=9E=E5=8A=9F=E8=83=BD:=20?= =?UTF-8?q?=E5=9C=A8=E6=96=87=E4=BB=B6=E6=89=A9=E5=B1=95=E5=90=8D=E7=99=BD?= =?UTF-8?q?=E5=90=8D=E5=8D=95=E4=B8=AD=E6=B7=BB=E5=8A=A0Godot=E7=9B=B8?= =?UTF-8?q?=E5=85=B3=E5=90=8E=E7=BC=80=20=20=20*=20=E6=B7=BB=E5=8A=A0=20`.?= =?UTF-8?q?godot`=20=E5=92=8C=20`.gd`=20=E6=94=AF=E6=8C=81=20=20=20*=20?= =?UTF-8?q?=E6=B7=BB=E5=8A=A0=20`.gd.uid`=20=E5=92=8C=20`.tscn`=20?= =?UTF-8?q?=E6=94=AF=E6=8C=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/utils/binary_detector.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/utils/binary_detector.py b/src/utils/binary_detector.py index cdbf2a8..420278a 100644 --- a/src/utils/binary_detector.py +++ b/src/utils/binary_detector.py @@ -83,6 +83,11 @@ ".vbs", # VBScript ".reg", # Windows Registry ".desktop", + # Godot + ".godot", + ".gd", + ".gd.uid", + ".tscn", } ) From 0a77885c79c0f4e7c9694731e5ef9a6a65b33d17 Mon Sep 17 00:00:00 2001 From: Suntion <149924916+SunYanbox@users.noreply.github.com> Date: Thu, 7 May 2026 17:46:45 +0800 Subject: [PATCH 4/5] =?UTF-8?q?feat(utils):=20=E6=89=A9=E5=B1=95=E4=BA=8C?= =?UTF-8?q?=E8=BF=9B=E5=88=B6=E6=96=87=E4=BB=B6=E6=A3=80=E6=B5=8B=E5=90=8E?= =?UTF-8?q?=E7=BC=80=E5=88=97=E8=A1=A8=20-=20=E6=96=B0=E5=A2=9E=E5=8A=9F?= =?UTF-8?q?=E8=83=BD:=20=E5=9C=A8=20binary=5Fdetector.py=20=E7=9A=84=20FIL?= =?UTF-8?q?E=5FEXTENSIONS=20=E9=9B=86=E5=90=88=E4=B8=AD=E5=A2=9E=E5=8A=A0?= =?UTF-8?q?=E5=AF=B9=E7=89=B9=E5=AE=9A=E7=BC=96=E8=AF=91=E4=BA=A7=E7=89=A9?= =?UTF-8?q?=E5=92=8C=E6=95=B0=E6=8D=AE=E5=BA=93=E6=96=87=E4=BB=B6=E7=9A=84?= =?UTF-8?q?=E8=AF=86=E5=88=AB=20=20=20*=20=E6=B7=BB=E5=8A=A0=20.pdb=20(?= =?UTF-8?q?=E7=A8=8B=E5=BA=8F=E6=95=B0=E6=8D=AE=E5=BA=93)=20=E5=90=8E?= =?UTF-8?q?=E7=BC=80=20=20=20*=20=E6=B7=BB=E5=8A=A0=20.pyd=20(Python=20?= =?UTF-8?q?=E5=8A=A8=E6=80=81=E9=93=BE=E6=8E=A5=E5=BA=93)=20=E5=90=8E?= =?UTF-8?q?=E7=BC=80=20=20=20*=20=E6=B7=BB=E5=8A=A0=20.o=20(=E7=9B=AE?= =?UTF-8?q?=E6=A0=87=E6=96=87=E4=BB=B6)=20=E5=90=8E=E7=BC=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/utils/binary_detector.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/utils/binary_detector.py b/src/utils/binary_detector.py index 420278a..39af0f7 100644 --- a/src/utils/binary_detector.py +++ b/src/utils/binary_detector.py @@ -188,6 +188,9 @@ ".db", ".sqlite", ".sqlite3", + ".pdb", + ".pyd", + ".o", } ) From 83563eeb178a3016fb952bf4aab26924c2e6e3d4 Mon Sep 17 00:00:00 2001 From: Suntion <149924916+SunYanbox@users.noreply.github.com> Date: Thu, 7 May 2026 17:47:48 +0800 Subject: [PATCH 5/5] =?UTF-8?q?feat(workspace/tools):=20=E4=BC=98=E5=8C=96?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E6=90=9C=E7=B4=A2=E9=80=BB=E8=BE=91=E5=B9=B6?= =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E4=BA=8C=E8=BF=9B=E5=88=B6=E6=96=87=E4=BB=B6?= =?UTF-8?q?=E8=BF=87=E6=BB=A4=20(#154)=20-=20=E6=96=B0=E5=A2=9E=E5=8A=9F?= =?UTF-8?q?=E8=83=BD:=20=E9=9B=86=E6=88=90=E4=BA=8C=E8=BF=9B=E5=88=B6?= =?UTF-8?q?=E6=96=87=E4=BB=B6=E6=A3=80=E6=B5=8B=E6=9C=BA=E5=88=B6=20=20=20?= =?UTF-8?q?*=20=E5=9C=A8=20`regex=5Fsearch=5Ftool.py`=20=E5=92=8C=20`exact?= =?UTF-8?q?=5Fsearch=5Ftool.py`=20=E4=B8=AD=E5=AF=BC=E5=85=A5=20`src.utils?= =?UTF-8?q?.binary=5Fdetector.is=5Fbinary=5Ffile`=20=20=20*=20=E5=9C=A8?= =?UTF-8?q?=E9=81=8D=E5=8E=86=E6=96=87=E4=BB=B6=E5=88=97=E8=A1=A8=E6=97=B6?= =?UTF-8?q?=E5=A2=9E=E5=8A=A0=20`is=5Fbinary=5Ffile(file=5Fpath)`=20?= =?UTF-8?q?=E5=88=A4=E6=96=AD=EF=BC=8C=E8=87=AA=E5=8A=A8=E8=B7=B3=E8=BF=87?= =?UTF-8?q?=E4=BA=8C=E8=BF=9B=E5=88=B6=E6=96=87=E4=BB=B6=20-=20=E9=87=8D?= =?UTF-8?q?=E6=9E=84=E4=BC=98=E5=8C=96:=20=E5=A2=9E=E5=BC=BA=E6=96=87?= =?UTF-8?q?=E4=BB=B6=E8=B7=AF=E5=BE=84=E7=AD=9B=E9=80=89=E4=B8=8E=E6=8E=92?= =?UTF-8?q?=E9=99=A4=E9=80=BB=E8=BE=91=20=20=20*=20=E4=BF=AE=E6=94=B9=20`f?= =?UTF-8?q?iles=5Fto=5Fsearch`=20=E7=94=9F=E6=88=90=E9=80=BB=E8=BE=91?= =?UTF-8?q?=EF=BC=8C=E5=B0=86=E7=AE=80=E5=8D=95=E7=9A=84=20`rglob`=20?= =?UTF-8?q?=E8=BD=AC=E6=8D=A2=E4=B8=BA=E5=8C=85=E5=90=AB=E6=9D=A1=E4=BB=B6?= =?UTF-8?q?=E8=BF=87=E6=BB=A4=E7=9A=84=E5=88=97=E8=A1=A8=E6=8E=A8=E5=AF=BC?= =?UTF-8?q?=E5=BC=8F=20=20=20*=20=E5=BC=95=E5=85=A5=20`self.=5Fexclusion?= =?UTF-8?q?=5Fmanager.should=5Fexclude=5Fpath(p)`=20=E6=96=B9=E6=B3=95?= =?UTF-8?q?=EF=BC=8C=E6=94=AF=E6=8C=81=E8=87=AA=E5=AE=9A=E4=B9=89=E8=B7=AF?= =?UTF-8?q?=E5=BE=84=E6=8E=92=E9=99=A4=E8=A7=84=E5=88=99=20=20=20*=20?= =?UTF-8?q?=E7=A1=AE=E4=BF=9D=E4=BB=85=E5=A4=84=E7=90=86=20`p.is=5Ffile()`?= =?UTF-8?q?=20=E4=B8=94=E6=9C=AA=E8=A2=AB=E6=8E=92=E9=99=A4=E7=9A=84?= =?UTF-8?q?=E6=9C=89=E6=95=88=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/workspace/tools/exact_search_tool.py | 15 ++++++++++++++- src/workspace/tools/regex_search_tool.py | 15 ++++++++++++++- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/src/workspace/tools/exact_search_tool.py b/src/workspace/tools/exact_search_tool.py index 3e11fd1..8456b04 100644 --- a/src/workspace/tools/exact_search_tool.py +++ b/src/workspace/tools/exact_search_tool.py @@ -2,6 +2,7 @@ from pathlib import Path from src.models.tools.tool_result import ToolResult +from src.utils.binary_detector import is_binary_file from src.workspace.tools.base_tool import BaseTool from src.workspace.workspace import Workspace @@ -117,12 +118,24 @@ def exact_search( warnings = [""] # 确定要搜索的文件列表(支持单文件或目录) - files_to_search = [search_path] if search_path.is_file() else list(search_path.rglob(file_pattern)) + files_to_search = ( + [search_path] + if search_path.is_file() + else [ + p + for p in search_path.rglob(file_pattern) + if p.is_file() and not self._exclusion_manager.should_exclude_path(p) + ] + ) # 遍历所有文件 for file_path in files_to_search: if not file_path.is_file(): continue + + if is_binary_file(file_path): + continue + # 检查是否达到限制 if total_matches >= limit: break diff --git a/src/workspace/tools/regex_search_tool.py b/src/workspace/tools/regex_search_tool.py index b96cd57..7ec6fad 100644 --- a/src/workspace/tools/regex_search_tool.py +++ b/src/workspace/tools/regex_search_tool.py @@ -2,6 +2,7 @@ from pathlib import Path from src.models.tools.tool_result import ToolResult +from src.utils.binary_detector import is_binary_file from src.workspace.tools.base_tool import BaseTool from src.workspace.workspace import Workspace @@ -144,12 +145,24 @@ def regex_search( warnings = [""] # 确定要搜索的文件列表(支持单文件或目录) - files_to_search = [search_path] if search_path.is_file() else list(search_path.rglob(file_pattern)) + files_to_search = ( + [search_path] + if search_path.is_file() + else [ + p + for p in search_path.rglob(file_pattern) + if p.is_file() and not self._exclusion_manager.should_exclude_path(p) + ] + ) # 遍历文件 for file_path in files_to_search: if not file_path.is_file(): continue + + if is_binary_file(file_path): + continue + # 检查是否达到限制 if total_matches >= limit: break