From 61c7f122afea318584e69e55d2324ba9fe21afe3 Mon Sep 17 00:00:00 2001 From: krisztianfekete Date: Thu, 26 Mar 2026 12:33:03 +0100 Subject: [PATCH 1/4] add venv support for custom evals --- examples/custom_evaluators/eval_config.yaml | 10 ++ src/agentevals/custom_evaluators.py | 56 +++++++--- src/agentevals/evaluator/sources.py | 24 +++- src/agentevals/evaluator/venv.py | 117 ++++++++++++++++++++ 4 files changed, 191 insertions(+), 16 deletions(-) create mode 100644 src/agentevals/evaluator/venv.py diff --git a/examples/custom_evaluators/eval_config.yaml b/examples/custom_evaluators/eval_config.yaml index 59171b6..a7c1f92 100644 --- a/examples/custom_evaluators/eval_config.yaml +++ b/examples/custom_evaluators/eval_config.yaml @@ -33,3 +33,13 @@ evaluators: threshold: 0.110 executor: local + # TODO switch to GitHub once PR is approved. + - name: bertscore + type: code + path: ../evaluators/evaluators/bertscore/bertscore.py + threshold: 0.7 + timeout: 300 + config: + expected: "There are two Helm releases installed in the cluster: kagent in namespace kagent (revision 2, deployed, chart kagent-0.7.14) and kagent-crds in namespace kagent (revision 1, deployed, chart kagent-crds-0.7.14)." + metric: "f1" + diff --git a/src/agentevals/custom_evaluators.py b/src/agentevals/custom_evaluators.py index 3889476..2a9dff2 100644 --- a/src/agentevals/custom_evaluators.py +++ b/src/agentevals/custom_evaluators.py @@ -68,8 +68,12 @@ def extensions(self) -> tuple[str, ...]: """File extensions this runtime handles (e.g. ``(".py",)``).""" @abc.abstractmethod - def build_command(self, path: Path) -> list[str]: - """Return the argv list to execute *path*.""" + def build_command(self, path: Path, python: Path | None = None) -> list[str]: + """Return the argv list to execute *path*. + + For Python runtimes, *python* may point to a venv interpreter. + Non-Python runtimes ignore this parameter. + """ def is_available(self) -> bool: """Return True if the runtime's interpreter is found on the system.""" @@ -89,8 +93,9 @@ def name(self) -> str: def extensions(self) -> tuple[str, ...]: return (".py",) - def build_command(self, path: Path) -> list[str]: - return [sys.executable, str(path)] + def build_command(self, path: Path, python: Path | None = None) -> list[str]: + exe = str(python) if python else sys.executable + return [exe, str(path)] def is_available(self) -> bool: return True @@ -105,7 +110,7 @@ def name(self) -> str: def extensions(self) -> tuple[str, ...]: return (".js", ".ts") - def build_command(self, path: Path) -> list[str]: + def build_command(self, path: Path, python: Path | None = None) -> list[str]: node = shutil.which("node") if not node: raise RuntimeError("Node.js not found on PATH (required for .js/.ts evaluators)") @@ -203,19 +208,22 @@ class SubprocessBackend(EvaluatorBackend): """Runs a local code file (.py, .js, .ts, …) as a subprocess. The correct interpreter is resolved from the file extension via the - :data:`_RUNTIMES` registry. + :data:`_RUNTIMES` registry. When *venv_python* is provided, Python + evaluators run inside that virtual environment instead of the host + interpreter. """ - def __init__(self, path: Path, timeout: int = 30): + def __init__(self, path: Path, timeout: int = 30, venv_python: Path | None = None): self._path = path.resolve() self._runtime = _resolve_runtime(self._path) self._timeout = timeout + self._venv_python = venv_python if not self._path.exists(): raise FileNotFoundError(f"Evaluator file not found: {self._path}") async def run(self, eval_input: EvalInput, metric_name: str) -> EvalResult: - cmd = self._runtime.build_command(self._path) + cmd = self._runtime.build_command(self._path, self._venv_python) return await _run_subprocess(cmd, eval_input.model_dump_json(), self._timeout, metric_name) @@ -223,20 +231,22 @@ async def run(self, eval_input: EvalInput, metric_name: str) -> EvalResult: # Executor factory # --------------------------------------------------------------------------- -_EXECUTOR_FACTORIES: dict[str, Callable[[Path, int], EvaluatorBackend]] = { - "local": lambda path, timeout: SubprocessBackend(path, timeout), +_EXECUTOR_FACTORIES: dict[str, Callable[..., EvaluatorBackend]] = { + "local": lambda path, timeout, venv_python=None: SubprocessBackend(path, timeout, venv_python), } -def create_executor(executor_name: str, path: Path, timeout: int = 30) -> EvaluatorBackend: +def create_executor( + executor_name: str, path: Path, timeout: int = 30, venv_python: Path | None = None +) -> EvaluatorBackend: """Construct an EvaluatorBackend by executor name (e.g. 'local', 'docker').""" factory = _EXECUTOR_FACTORIES.get(executor_name) if factory is None: raise ValueError(f"Unknown executor '{executor_name}'. Available: {sorted(_EXECUTOR_FACTORIES.keys())}") - return factory(path, timeout) + return factory(path, timeout, venv_python) -def register_executor(name: str, factory: Callable[[Path, int], EvaluatorBackend]) -> None: +def register_executor(name: str, factory: Callable[..., EvaluatorBackend]) -> None: """Register a new executor factory (e.g. for Docker support).""" _EXECUTOR_FACTORIES[name] = factory @@ -425,7 +435,25 @@ async def evaluate_custom_evaluator( evaluator_def = await get_default_resolver().resolve(evaluator_def) if isinstance(evaluator_def, CodeEvaluatorDef): - backend = create_executor(evaluator_def.executor, Path(evaluator_def.path), evaluator_def.timeout) + evaluator_path = Path(evaluator_def.path) + + # Set up a venv if the evaluator ships a requirements.txt. + venv_python: Path | None = None + if evaluator_path.suffix == ".py": + from .evaluator.venv import ensure_venv_async + + try: + venv_python = await ensure_venv_async(evaluator_path) + except Exception as exc: + logger.error("Failed to set up venv for '%s': %s", evaluator_def.name, exc) + return MetricResult( + metric_name=evaluator_def.name, + error=f"Dependency installation failed: {exc}", + ) + + backend = create_executor( + evaluator_def.executor, evaluator_path, evaluator_def.timeout, venv_python=venv_python + ) else: raise ValueError(f"Unsupported custom evaluator type: {type(evaluator_def).__name__}") diff --git a/src/agentevals/evaluator/sources.py b/src/agentevals/evaluator/sources.py index 9ef5d1f..5122c29 100644 --- a/src/agentevals/evaluator/sources.py +++ b/src/agentevals/evaluator/sources.py @@ -216,8 +216,22 @@ async def fetch_evaluator(self, ref: str, dest: Path) -> Path: resp = await client.get(url, headers=self._headers(), timeout=30) resp.raise_for_status() - dest.parent.mkdir(parents=True, exist_ok=True) - dest.write_text(resp.text, encoding="utf-8") # noqa: ASYNC240 + dest.parent.mkdir(parents=True, exist_ok=True) + dest.write_text(resp.text, encoding="utf-8") # noqa: ASYNC240 + + # Also try to fetch requirements.txt from the same directory. + ref_dir = str(Path(ref).parent) + req_ref = f"{ref_dir}/requirements.txt" + req_url = self._raw_url(req_ref) + try: + req_resp = await client.get(req_url, headers=self._headers(), timeout=15) + if req_resp.status_code == 200: + req_dest = dest.parent / "requirements.txt" + req_dest.write_text(req_resp.text, encoding="utf-8") # noqa: ASYNC240 + logger.info("Downloaded requirements.txt for evaluator") + except httpx.HTTPError: + logger.debug("No requirements.txt found for evaluator (or download failed)") + return dest @@ -267,6 +281,12 @@ async def fetch_evaluator(self, ref: str, dest: Path) -> Path: dest.parent.mkdir(parents=True, exist_ok=True) shutil.copy2(src, dest) + + # Also copy requirements.txt if it exists alongside the source file. + req_src = src.parent / "requirements.txt" + if req_src.exists(): + shutil.copy2(req_src, dest.parent / "requirements.txt") + return dest diff --git a/src/agentevals/evaluator/venv.py b/src/agentevals/evaluator/venv.py new file mode 100644 index 0000000..43aca99 --- /dev/null +++ b/src/agentevals/evaluator/venv.py @@ -0,0 +1,117 @@ +"""Virtual environment management for evaluators with dependencies. + +When an evaluator ships a ``requirements.txt`` alongside its entrypoint, we +create a cached venv, install the dependencies (plus the evaluator SDK), and +return the path to that venv's Python interpreter so the evaluator subprocess +runs in isolation. +""" + +from __future__ import annotations + +import asyncio +import hashlib +import logging +import shutil +import subprocess +import sys +from pathlib import Path + +logger = logging.getLogger(__name__) + +_VENV_CACHE_DIR = Path.home() / ".cache" / "agentevals" / "venvs" +_HASH_FILE = ".requirements_hash" + +# Per-evaluator locks to prevent concurrent venv creation for the same evaluator. +_venv_locks: dict[str, asyncio.Lock] = {} + + +def _venv_python(venv_dir: Path) -> Path: + if sys.platform == "win32": + return venv_dir / "Scripts" / "python.exe" + return venv_dir / "bin" / "python" + + +def _venv_key(evaluator_path: Path) -> str: + """Stable cache directory name derived from evaluator location.""" + resolved = evaluator_path.resolve() + name = resolved.parent.name + path_hash = hashlib.sha256(str(resolved.parent).encode()).hexdigest()[:8] + return f"{name}-{path_hash}" + + +def _is_venv_valid(venv_dir: Path, req_hash: str) -> bool: + hash_file = venv_dir / _HASH_FILE + return _venv_python(venv_dir).exists() and hash_file.exists() and hash_file.read_text().strip() == req_hash + + +def _create_venv(venv_dir: Path, uv: str | None) -> None: + if venv_dir.exists(): + shutil.rmtree(venv_dir) + cmd = ( + [uv, "venv", str(venv_dir), "--python", sys.executable] if uv else [sys.executable, "-m", "venv", str(venv_dir)] + ) + subprocess.run(cmd, check=True, capture_output=True) + + +def _install_deps(venv_dir: Path, requirements: Path, uv: str | None) -> None: + python = str(_venv_python(venv_dir)) + sdk_spec = "agentevals-evaluator-sdk" + + if uv: + base = [uv, "pip", "install", "--python", python] + else: + base = [python, "-m", "pip", "install"] + + subprocess.run(base + [sdk_spec], check=True, capture_output=True) + logger.info("Installing dependencies from %s ...", requirements.name) + subprocess.run(base + ["-r", str(requirements)], check=True) + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + + +def ensure_venv(evaluator_path: Path) -> Path | None: + """Ensure a cached venv exists for *evaluator_path* if it has ``requirements.txt``. + + Returns the venv Python path, or ``None`` if no venv is needed. + """ + requirements = evaluator_path.resolve().parent / "requirements.txt" + if not requirements.exists(): + return None + + req_hash = hashlib.sha256(requirements.read_bytes()).hexdigest() + venv_dir = _VENV_CACHE_DIR / _venv_key(evaluator_path) + + if _is_venv_valid(venv_dir, req_hash): + logger.debug("Using cached venv for %s at %s", evaluator_path.name, venv_dir) + return _venv_python(venv_dir) + + uv = shutil.which("uv") + logger.info( + "Setting up environment for evaluator '%s' (using %s). This may take a while on first run...", + evaluator_path.stem, + "uv" if uv else "venv+pip", + ) + + try: + _create_venv(venv_dir, uv) + _install_deps(venv_dir, requirements, uv) + except subprocess.CalledProcessError as exc: + stderr = exc.stderr.decode() if isinstance(exc.stderr, bytes) else (exc.stderr or "") + raise RuntimeError(f"Failed to set up environment for evaluator '{evaluator_path.stem}': {stderr}") from exc + + (venv_dir / _HASH_FILE).write_text(req_hash) + logger.info("Environment ready for '%s'", evaluator_path.stem) + return _venv_python(venv_dir) + + +async def ensure_venv_async(evaluator_path: Path) -> Path | None: + """Async wrapper around :func:`ensure_venv` with per-evaluator locking.""" + venv_key = _venv_key(evaluator_path) + if venv_key not in _venv_locks: + _venv_locks[venv_key] = asyncio.Lock() + + async with _venv_locks[venv_key]: + return await asyncio.to_thread(ensure_venv, evaluator_path) From 72eb775d16924c51b864e4d980997383433cd6d4 Mon Sep 17 00:00:00 2001 From: krisztianfekete Date: Thu, 26 Mar 2026 16:36:38 +0100 Subject: [PATCH 2/4] address review comments --- examples/custom_evaluators/eval_config.yaml | 11 ----------- src/agentevals/evaluator/sources.py | 4 ++-- src/agentevals/evaluator/venv.py | 1 + 3 files changed, 3 insertions(+), 13 deletions(-) diff --git a/examples/custom_evaluators/eval_config.yaml b/examples/custom_evaluators/eval_config.yaml index a7c1f92..d3bd261 100644 --- a/examples/custom_evaluators/eval_config.yaml +++ b/examples/custom_evaluators/eval_config.yaml @@ -32,14 +32,3 @@ evaluators: ref: evaluators/random_evaluator/random_evaluator.py threshold: 0.110 executor: local - - # TODO switch to GitHub once PR is approved. - - name: bertscore - type: code - path: ../evaluators/evaluators/bertscore/bertscore.py - threshold: 0.7 - timeout: 300 - config: - expected: "There are two Helm releases installed in the cluster: kagent in namespace kagent (revision 2, deployed, chart kagent-0.7.14) and kagent-crds in namespace kagent (revision 1, deployed, chart kagent-crds-0.7.14)." - metric: "f1" - diff --git a/src/agentevals/evaluator/sources.py b/src/agentevals/evaluator/sources.py index 5122c29..8fe022c 100644 --- a/src/agentevals/evaluator/sources.py +++ b/src/agentevals/evaluator/sources.py @@ -8,7 +8,7 @@ import os import time from dataclasses import asdict, dataclass, field -from pathlib import Path +from pathlib import Path, PurePosixPath import yaml @@ -220,7 +220,7 @@ async def fetch_evaluator(self, ref: str, dest: Path) -> Path: dest.write_text(resp.text, encoding="utf-8") # noqa: ASYNC240 # Also try to fetch requirements.txt from the same directory. - ref_dir = str(Path(ref).parent) + ref_dir = str(PurePosixPath(ref).parent) req_ref = f"{ref_dir}/requirements.txt" req_url = self._raw_url(req_ref) try: diff --git a/src/agentevals/evaluator/venv.py b/src/agentevals/evaluator/venv.py index 43aca99..82d7401 100644 --- a/src/agentevals/evaluator/venv.py +++ b/src/agentevals/evaluator/venv.py @@ -96,6 +96,7 @@ def ensure_venv(evaluator_path: Path) -> Path | None: ) try: + venv_dir.parent.mkdir(parents=True, exist_ok=True) _create_venv(venv_dir, uv) _install_deps(venv_dir, requirements, uv) except subprocess.CalledProcessError as exc: From ac54608345c1a8bbae2c500e295135e528525417 Mon Sep 17 00:00:00 2001 From: krisztianfekete Date: Fri, 27 Mar 2026 10:39:01 +0100 Subject: [PATCH 3/4] address review feedback --- src/agentevals/custom_evaluators.py | 44 +++++++++++++---------------- src/agentevals/evaluator/venv.py | 2 +- 2 files changed, 21 insertions(+), 25 deletions(-) diff --git a/src/agentevals/custom_evaluators.py b/src/agentevals/custom_evaluators.py index 2a9dff2..43f91c7 100644 --- a/src/agentevals/custom_evaluators.py +++ b/src/agentevals/custom_evaluators.py @@ -68,12 +68,8 @@ def extensions(self) -> tuple[str, ...]: """File extensions this runtime handles (e.g. ``(".py",)``).""" @abc.abstractmethod - def build_command(self, path: Path, python: Path | None = None) -> list[str]: - """Return the argv list to execute *path*. - - For Python runtimes, *python* may point to a venv interpreter. - Non-Python runtimes ignore this parameter. - """ + def build_command(self, path: Path) -> list[str]: + """Return the argv list to execute *path*.""" def is_available(self) -> bool: """Return True if the runtime's interpreter is found on the system.""" @@ -85,6 +81,9 @@ def is_available(self) -> bool: class PythonRuntime(Runtime): + def __init__(self, python_path: Path | None = None): + self._python_path = python_path + @property def name(self) -> str: return "Python" @@ -93,8 +92,8 @@ def name(self) -> str: def extensions(self) -> tuple[str, ...]: return (".py",) - def build_command(self, path: Path, python: Path | None = None) -> list[str]: - exe = str(python) if python else sys.executable + def build_command(self, path: Path) -> list[str]: + exe = str(self._python_path) if self._python_path else sys.executable return [exe, str(path)] def is_available(self) -> bool: @@ -110,7 +109,7 @@ def name(self) -> str: def extensions(self) -> tuple[str, ...]: return (".js", ".ts") - def build_command(self, path: Path, python: Path | None = None) -> list[str]: + def build_command(self, path: Path) -> list[str]: node = shutil.which("node") if not node: raise RuntimeError("Node.js not found on PATH (required for .js/.ts evaluators)") @@ -208,22 +207,20 @@ class SubprocessBackend(EvaluatorBackend): """Runs a local code file (.py, .js, .ts, …) as a subprocess. The correct interpreter is resolved from the file extension via the - :data:`_RUNTIMES` registry. When *venv_python* is provided, Python - evaluators run inside that virtual environment instead of the host - interpreter. + :data:`_RUNTIMES` registry. Pass a pre-configured *runtime* to override + the default (e.g. a :class:`PythonRuntime` with a venv interpreter). """ - def __init__(self, path: Path, timeout: int = 30, venv_python: Path | None = None): + def __init__(self, path: Path, timeout: int = 30, runtime: Runtime | None = None): self._path = path.resolve() - self._runtime = _resolve_runtime(self._path) + self._runtime = runtime or _resolve_runtime(self._path) self._timeout = timeout - self._venv_python = venv_python if not self._path.exists(): raise FileNotFoundError(f"Evaluator file not found: {self._path}") async def run(self, eval_input: EvalInput, metric_name: str) -> EvalResult: - cmd = self._runtime.build_command(self._path, self._venv_python) + cmd = self._runtime.build_command(self._path) return await _run_subprocess(cmd, eval_input.model_dump_json(), self._timeout, metric_name) @@ -232,18 +229,18 @@ async def run(self, eval_input: EvalInput, metric_name: str) -> EvalResult: # --------------------------------------------------------------------------- _EXECUTOR_FACTORIES: dict[str, Callable[..., EvaluatorBackend]] = { - "local": lambda path, timeout, venv_python=None: SubprocessBackend(path, timeout, venv_python), + "local": lambda path, timeout, runtime=None: SubprocessBackend(path, timeout, runtime), } def create_executor( - executor_name: str, path: Path, timeout: int = 30, venv_python: Path | None = None + executor_name: str, path: Path, timeout: int = 30, runtime: Runtime | None = None ) -> EvaluatorBackend: """Construct an EvaluatorBackend by executor name (e.g. 'local', 'docker').""" factory = _EXECUTOR_FACTORIES.get(executor_name) if factory is None: raise ValueError(f"Unknown executor '{executor_name}'. Available: {sorted(_EXECUTOR_FACTORIES.keys())}") - return factory(path, timeout, venv_python) + return factory(path, timeout, runtime) def register_executor(name: str, factory: Callable[..., EvaluatorBackend]) -> None: @@ -437,8 +434,7 @@ async def evaluate_custom_evaluator( if isinstance(evaluator_def, CodeEvaluatorDef): evaluator_path = Path(evaluator_def.path) - # Set up a venv if the evaluator ships a requirements.txt. - venv_python: Path | None = None + runtime: Runtime | None = None if evaluator_path.suffix == ".py": from .evaluator.venv import ensure_venv_async @@ -450,10 +446,10 @@ async def evaluate_custom_evaluator( metric_name=evaluator_def.name, error=f"Dependency installation failed: {exc}", ) + if venv_python: + runtime = PythonRuntime(python_path=venv_python) - backend = create_executor( - evaluator_def.executor, evaluator_path, evaluator_def.timeout, venv_python=venv_python - ) + backend = create_executor(evaluator_def.executor, evaluator_path, evaluator_def.timeout, runtime=runtime) else: raise ValueError(f"Unsupported custom evaluator type: {type(evaluator_def).__name__}") diff --git a/src/agentevals/evaluator/venv.py b/src/agentevals/evaluator/venv.py index 82d7401..931489a 100644 --- a/src/agentevals/evaluator/venv.py +++ b/src/agentevals/evaluator/venv.py @@ -64,7 +64,7 @@ def _install_deps(venv_dir: Path, requirements: Path, uv: str | None) -> None: subprocess.run(base + [sdk_spec], check=True, capture_output=True) logger.info("Installing dependencies from %s ...", requirements.name) - subprocess.run(base + ["-r", str(requirements)], check=True) + subprocess.run(base + ["-r", str(requirements)], check=True, capture_output=True) # --------------------------------------------------------------------------- From 74ce5dfbb7d3552e620ff4b1e907b82a87b5e6da Mon Sep 17 00:00:00 2001 From: krisztianfekete Date: Fri, 27 Mar 2026 11:28:56 +0100 Subject: [PATCH 4/4] address follow-up feedback, adopt the approach at NodeRuntime too --- src/agentevals/custom_evaluators.py | 29 +++++++++++++++++------------ src/agentevals/evaluator/venv.py | 3 ++- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/src/agentevals/custom_evaluators.py b/src/agentevals/custom_evaluators.py index 43f91c7..785af73 100644 --- a/src/agentevals/custom_evaluators.py +++ b/src/agentevals/custom_evaluators.py @@ -82,7 +82,7 @@ def is_available(self) -> bool: class PythonRuntime(Runtime): def __init__(self, python_path: Path | None = None): - self._python_path = python_path + self._exe = str(python_path) if python_path else sys.executable @property def name(self) -> str: @@ -93,14 +93,16 @@ def extensions(self) -> tuple[str, ...]: return (".py",) def build_command(self, path: Path) -> list[str]: - exe = str(self._python_path) if self._python_path else sys.executable - return [exe, str(path)] + return [self._exe, str(path)] def is_available(self) -> bool: return True class NodeRuntime(Runtime): + def __init__(self) -> None: + self._exe = shutil.which("node") + @property def name(self) -> str: return "Node.js" @@ -110,10 +112,12 @@ def extensions(self) -> tuple[str, ...]: return (".js", ".ts") def build_command(self, path: Path) -> list[str]: - node = shutil.which("node") - if not node: + if not self._exe: raise RuntimeError("Node.js not found on PATH (required for .js/.ts evaluators)") - return [node, str(path)] + return [self._exe, str(path)] + + def is_available(self) -> bool: + return self._exe is not None _RUNTIMES: list[Runtime] = [ @@ -229,18 +233,16 @@ async def run(self, eval_input: EvalInput, metric_name: str) -> EvalResult: # --------------------------------------------------------------------------- _EXECUTOR_FACTORIES: dict[str, Callable[..., EvaluatorBackend]] = { - "local": lambda path, timeout, runtime=None: SubprocessBackend(path, timeout, runtime), + "local": lambda path, timeout: SubprocessBackend(path, timeout), } -def create_executor( - executor_name: str, path: Path, timeout: int = 30, runtime: Runtime | None = None -) -> EvaluatorBackend: +def create_executor(executor_name: str, path: Path, timeout: int = 30) -> EvaluatorBackend: """Construct an EvaluatorBackend by executor name (e.g. 'local', 'docker').""" factory = _EXECUTOR_FACTORIES.get(executor_name) if factory is None: raise ValueError(f"Unknown executor '{executor_name}'. Available: {sorted(_EXECUTOR_FACTORIES.keys())}") - return factory(path, timeout, runtime) + return factory(path, timeout) def register_executor(name: str, factory: Callable[..., EvaluatorBackend]) -> None: @@ -449,7 +451,10 @@ async def evaluate_custom_evaluator( if venv_python: runtime = PythonRuntime(python_path=venv_python) - backend = create_executor(evaluator_def.executor, evaluator_path, evaluator_def.timeout, runtime=runtime) + if runtime is not None: + backend = SubprocessBackend(evaluator_path, evaluator_def.timeout, runtime=runtime) + else: + backend = create_executor(evaluator_def.executor, evaluator_path, evaluator_def.timeout) else: raise ValueError(f"Unsupported custom evaluator type: {type(evaluator_def).__name__}") diff --git a/src/agentevals/evaluator/venv.py b/src/agentevals/evaluator/venv.py index 931489a..fcbfc88 100644 --- a/src/agentevals/evaluator/venv.py +++ b/src/agentevals/evaluator/venv.py @@ -11,6 +11,7 @@ import asyncio import hashlib import logging +import os import shutil import subprocess import sys @@ -18,7 +19,7 @@ logger = logging.getLogger(__name__) -_VENV_CACHE_DIR = Path.home() / ".cache" / "agentevals" / "venvs" +_VENV_CACHE_DIR = Path(os.environ.get("XDG_CACHE_HOME", Path.home() / ".cache")) / "agentevals" / "venvs" _HASH_FILE = ".requirements_hash" # Per-evaluator locks to prevent concurrent venv creation for the same evaluator.