diff --git a/.env.example b/.env.example index 747f75424823..812986dca308 100644 --- a/.env.example +++ b/.env.example @@ -281,6 +281,13 @@ BROWSER_SESSION_TIMEOUT=300 # Browser sessions are automatically closed after this period of no activity BROWSER_INACTIVITY_TIMEOUT=120 +# Extra Chromium launch flags passed to agent-browser, comma- or newline-separated. +# Hermes auto-injects "--no-sandbox,--disable-dev-shm-usage" when it detects root +# or AppArmor-restricted unprivileged user namespaces (Ubuntu 23.10+, DGX Spark, +# many container images), so leave this unset unless you need extra flags. +# Setting this disables the auto-injection. +# AGENT_BROWSER_ARGS=--no-sandbox + # Camofox local anti-detection browser (Camoufox-based Firefox). # Set CAMOFOX_URL to route the browser tools through a local Camofox server # instead of agent-browser/Browserbase. See docs/user-guide/features/browser.md. @@ -387,24 +394,6 @@ IMAGE_TOOLS_DEBUG=false # CONTEXT_COMPRESSION_THRESHOLD=0.85 # Compress at 85% of context limit # Model is set via compression.summary_model in config.yaml (default: google/gemini-3-flash-preview) -# ============================================================================= -# RL TRAINING (Tinker + Atropos) -# ============================================================================= -# Run reinforcement learning training on language models using the Tinker API. -# Requires the rl-server to be running (from tinker-atropos package). - -# Tinker API Key - RL training service -# Get at: https://tinker-console.thinkingmachines.ai/keys -# TINKER_API_KEY= - -# Weights & Biases API Key - Experiment tracking and metrics -# Get at: https://wandb.ai/authorize -# WANDB_API_KEY= - -# RL API Server URL (default: http://localhost:8080) -# Change if running the rl-server on a different host/port -# RL_API_URL=http://localhost:8080 - # ============================================================================= # SKILLS HUB (GitHub integration for skill search/install/publish) # ============================================================================= diff --git a/.github/workflows/supply-chain-audit.yml b/.github/workflows/supply-chain-audit.yml index 417e7b21f843..69a9a115c87d 100644 --- a/.github/workflows/supply-chain-audit.yml +++ b/.github/workflows/supply-chain-audit.yml @@ -11,6 +11,7 @@ on: - '**/sitecustomize.py' - '**/usercustomize.py' - '**/__init__.pth' + - 'pyproject.toml' permissions: pull-requests: write @@ -137,3 +138,68 @@ jobs: run: | echo "::error::CRITICAL supply chain risk patterns detected in this PR. See the PR comment for details." exit 1 + + dep-bounds: + name: Check PyPI dependency upper bounds + runs-on: ubuntu-latest + if: contains(github.event.pull_request.changed_files_url, 'pyproject.toml') || true + steps: + - name: Checkout + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + with: + fetch-depth: 0 + + - name: Check for unbounded PyPI deps + id: bounds + run: | + set -euo pipefail + + BASE="${{ github.event.pull_request.base.sha }}" + HEAD="${{ github.event.pull_request.head.sha }}" + + # Only check added lines in pyproject.toml + ADDED=$(git diff "$BASE".."$HEAD" -- pyproject.toml | grep '^+' | grep -v '^+++' || true) + + if [ -z "$ADDED" ]; then + echo "found=false" >> "$GITHUB_OUTPUT" + exit 0 + fi + + # Match PyPI dep specs that have >= but no < ceiling. + # Pattern: "package>=version" without a following ",<" bound. + # Excludes git+ URLs (which use commit SHAs) and comments. + UNBOUNDED=$(echo "$ADDED" | grep -oE '"[a-zA-Z0-9_-]+(\[[^\]]*\])?>=[ 0-9.]+"' | grep -v ',<' || true) + + if [ -n "$UNBOUNDED" ]; then + echo "found=true" >> "$GITHUB_OUTPUT" + echo "$UNBOUNDED" > /tmp/unbounded.txt + else + echo "found=false" >> "$GITHUB_OUTPUT" + fi + + - name: Post unbounded dep warning + if: steps.bounds.outputs.found == 'true' + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + BODY="## โš ๏ธ Unbounded PyPI Dependency Detected + + This PR adds PyPI dependencies without a \`=floor,=1.2.0,<2\"\` + + --- + *See PR #2810 and CONTRIBUTING.md for the full policy rationale.*" + + gh pr comment "${{ github.event.pull_request.number }}" --body "$BODY" || echo "::warning::Could not post PR comment (expected for fork PRs)" + + - name: Fail on unbounded deps + if: steps.bounds.outputs.found == 'true' + run: | + echo "::error::PyPI dependencies without upper bounds detected. Add /dev/null 2>&1; then + echo "Release $GITHUB_REF_NAME found" + exit 0 + fi + echo "Waiting for release... ($i/30)" + sleep 10 + done + echo "::warning::Release $GITHUB_REF_NAME not found after 5 minutes โ€” skipping signature upload" + echo "skip_sign=true" >> "$GITHUB_ENV" + + - name: Sign with Sigstore + if: env.skip_sign != 'true' + uses: sigstore/gh-action-sigstore-python@f514d46b907ebcd5bedc05145c03b69c1edd8b46 # v3.0.0 + with: + inputs: >- + ./dist/*.tar.gz + ./dist/*.whl + + - name: Attach signed artifacts to GitHub Release + if: env.skip_sign != 'true' + env: + GITHUB_TOKEN: ${{ github.token }} + # release.py already created the GitHub Release โ€” just upload + # the Sigstore signatures alongside the existing assets. + run: >- + gh release upload + "$GITHUB_REF_NAME" dist/*.sigstore.json + --repo "$GITHUB_REPOSITORY" + --clobber diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 76580d6e8e50..000000000000 --- a/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "tinker-atropos"] - path = tinker-atropos - url = https://github.com/nousresearch/tinker-atropos diff --git a/AGENTS.md b/AGENTS.md index da9f903eefb5..7c324f50332a 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -56,7 +56,6 @@ hermes-agent/ โ”œโ”€โ”€ tui_gateway/ # Python JSON-RPC backend for the TUI โ”œโ”€โ”€ acp_adapter/ # ACP server (VS Code / Zed / JetBrains integration) โ”œโ”€โ”€ cron/ # Scheduler โ€” jobs.py, scheduler.py -โ”œโ”€โ”€ environments/ # RL training environments (Atropos) โ”œโ”€โ”€ scripts/ # run_tests.sh, release.py, auxiliary scripts โ”œโ”€โ”€ website/ # Docusaurus docs site โ””โ”€โ”€ tests/ # Pytest suite (~17k tests across ~900 files as of May 2026) @@ -309,6 +308,29 @@ The registry handles schema collection, dispatch, availability checking, and err --- +## Dependency Pinning Policy + +All dependencies must have upper bounds to limit supply-chain attack surface. +This policy was established after the litellm compromise (PR #2796, #2810) and +reinforced after the Mini Shai-Hulud worm campaign (May 2026). + +| Source type | Treatment | Example | +|---|---|---| +| PyPI package | `>=floor,=0.28.1,<1"` | +| Git URL | Commit SHA | `git+https://...@<40-char-sha>` | +| GitHub Actions | Commit SHA + comment | `uses: actions/checkout@ # v4` | +| CI-only pip | `==exact` | `pyyaml==6.0.2` | + +**When adding a new dependency to `pyproject.toml`:** +1. Pin to `>=current_version,=1.5.0,<2`). +2. For pre-1.0 packages, use `<0.(current_minor + 2)` (e.g. `>=0.29,<0.32`). +3. Never commit a bare `>=X.Y.Z` without a ceiling โ€” CI and reviewers will reject it. +4. Run `uv lock` to regenerate `uv.lock` with hashes. + +Reference: #2810 (bounds pass), #9801 (SHA pinning + audit CI). + +--- + ## Adding Configuration ### config.yaml options: diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 4bbc3c67c70b..36b1e9df2d57 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -91,9 +91,6 @@ export VIRTUAL_ENV="$(pwd)/venv" # Install with all extras (messaging, cron, CLI menus, dev tools) uv pip install -e ".[all,dev]" -# Optional: RL training submodule -# git submodule update --init tinker-atropos && uv pip install -e "./tinker-atropos" - # Optional: browser tools npm install ``` @@ -196,7 +193,6 @@ hermes-agent/ โ”‚ โ”œโ”€โ”€ skills/ # Bundled skills (copied to ~/.hermes/skills/ on install) โ”œโ”€โ”€ optional-skills/ # Official optional skills (discoverable via hub, not activated by default) -โ”œโ”€โ”€ environments/ # RL training environments (Atropos integration) โ”œโ”€โ”€ tests/ # Test suite โ”œโ”€โ”€ website/ # Documentation site (hermes-agent.nousresearch.com) โ”‚ @@ -804,6 +800,47 @@ Hermes has terminal access. Security matters. If your PR affects security, note it explicitly in the description. +### Dependency pinning policy (supply chain hardening) + +After the [litellm supply chain compromise](https://github.com/BerriAI/litellm/issues/24512) in March 2026 and the [Mini Shai-Hulud worm campaign](https://socket.dev/blog/tanstack-npm-packages-compromised-mini-shai-hulud-supply-chain-attack) in May 2026, all dependencies must follow these rules: + +| Source type | Required treatment | Rationale | +|---|---|---| +| **PyPI package** | `>=floor, # vX.Y.Z` | +| **CI-only pip installs** | `==exact` | Hermetic CI builds; churn is acceptable. | + +**Every new PyPI dependency in a PR must have a `=X.Y.Z` specs will be rejected by reviewers. The `supply-chain-audit.yml` CI workflow also flags dependency manifest changes for manual review. + +**How to determine the ceiling:** +- If the package is at version `1.x.y`, use `<2`. +- If the package is at version `0.x.y` (pre-1.0), use `<0.(current_minor + 2)` โ€” e.g. if current is `0.29.x`, use `<0.32`. This gives ~2 minor versions of headroom while keeping the window small enough that a hostile takeover version is unlikely to land inside it. +- Exception: packages with very stable APIs (e.g. `aiohttp-socks`) can use `<1` at reviewer discretion. + +**Examples:** +```toml +# โœ… Correct โ€” post-1.0 +"openai>=2.21.0,<3" +"pydantic>=2.12.5,<3" + +# โœ… Correct โ€” pre-1.0 (tight minor window) +"asyncpg>=0.29,<0.32" +"aiosqlite>=0.20,<0.23" +"hindsight-client>=0.4.22,<0.5" + +# โŒ Rejected โ€” no upper bound +"some-package>=1.2.3" + +# โŒ Rejected โ€” too tight (blocks legitimate patches) +"some-package==1.2.3" + +# โŒ Rejected โ€” too loose for pre-1.0 (allows 80 minor versions) +"some-package>=0.20,<1" +``` + +**Reference PRs:** #2796 (litellm removal), #2810 (upper bounds pass), #9801 (SHA pinning + supply-chain-audit CI). + --- ## Pull Request Process diff --git a/README.md b/README.md index 7e71632c3101..efe5515f4d8c 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ Use any model you want โ€” [Nous Portal](https://portal.nousresearch.com), [Open Scheduled automationsBuilt-in cron scheduler with delivery to any platform. Daily reports, nightly backups, weekly audits โ€” all in natural language, running unattended. Delegates and parallelizesSpawn isolated subagents for parallel workstreams. Write Python scripts that call tools via RPC, collapsing multi-step pipelines into zero-context-cost turns. Runs anywhere, not just your laptopSeven terminal backends โ€” local, Docker, SSH, Singularity, Modal, Daytona, and Vercel Sandbox. Daytona and Modal offer serverless persistence โ€” your agent's environment hibernates when idle and wakes on demand, costing nearly nothing between sessions. Run it on a $5 VPS or a GPU cluster. -Research-readyBatch trajectory generation, Atropos RL environments, trajectory compression for training the next generation of tool-calling models. +Research-readyBatch trajectory generation, trajectory compression for training the next generation of tool-calling models. --- @@ -175,8 +175,6 @@ uv pip install -e ".[all,dev]" scripts/run_tests.sh ``` -> **RL Training (optional):** The RL/Atropos integration (`environments/`) โ€” see [`CONTRIBUTING.md`](https://github.com/NousResearch/hermes-agent/blob/main/CONTRIBUTING.md#development-setup) for the full setup. - --- ## Community diff --git a/README.zh-CN.md b/README.zh-CN.md index ea7fea8dcce0..9a964574413b 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -23,7 +23,7 @@ ๅฎšๆ—ถ่‡ชๅŠจๅŒ–ๅ†…็ฝฎ cron ่ฐƒๅบฆๅ™จ๏ผŒๆ”ฏๆŒๅ‘ไปปไฝ•ๅนณๅฐๆŠ•้€’ใ€‚ๆ—ฅๆŠฅใ€ๅคœ้—ดๅค‡ไปฝใ€ๅ‘จๅฎก่ฎกโ€”โ€”ๅ…จ้ƒจ็”จ่‡ช็„ถ่ฏญ่จ€ๆ่ฟฐ๏ผŒๆ— ไบบๅ€ผๅฎˆ่ฟ่กŒใ€‚ ๅง”ๆดพไธŽๅนถ่กŒ็”Ÿๆˆ้š”็ฆปๅญไปฃ็†ๅค„็†ๅนถ่กŒๅทฅไฝœๆตใ€‚็ผ–ๅ†™ Python ่„šๆœฌ้€š่ฟ‡ RPC ่ฐƒ็”จๅทฅๅ…ท๏ผŒๅฐ†ๅคšๆญฅ็ฎก้“ๅŽ‹็ผฉไธบ้›ถไธŠไธ‹ๆ–‡ๅผ€้”€็š„่ฝฎๆฌกใ€‚ ้šๅค„่ฟ่กŒๅ…ญ็ง็ปˆ็ซฏๅŽ็ซฏโ€”โ€”ๆœฌๅœฐใ€Dockerใ€SSHใ€Daytonaใ€Singularity ๅ’Œ Modalใ€‚Daytona ๅ’Œ Modal ๆไพ› Serverless ๆŒไน…ๅŒ–โ€”โ€”ไปฃ็†็Žฏๅขƒ็ฉบ้—ฒๆ—ถไผ‘็œ ใ€ๆŒ‰้œ€ๅ”ค้†’๏ผŒ็ฉบ้—ฒๆœŸ้—ดๅ‡ ไนŽ้›ถๆˆๆœฌใ€‚$5 VPS ๆˆ– GPU ้›†็พค้ƒฝ่ƒฝ่ท‘ใ€‚ -็ ”็ฉถๅฐฑ็ปชๆ‰น้‡่ฝจ่ฟน็”Ÿๆˆใ€Atropos RL ็Žฏๅขƒใ€่ฝจ่ฟนๅŽ‹็ผฉโ€”โ€”็”จไบŽ่ฎญ็ปƒไธ‹ไธ€ไปฃๅทฅๅ…ท่ฐƒ็”จๆจกๅž‹ใ€‚ +็ ”็ฉถๅฐฑ็ปชๆ‰น้‡่ฝจ่ฟน็”Ÿๆˆใ€่ฝจ่ฟนๅŽ‹็ผฉโ€”โ€”็”จไบŽ่ฎญ็ปƒไธ‹ไธ€ไปฃๅทฅๅ…ท่ฐƒ็”จๆจกๅž‹ใ€‚ --- @@ -161,12 +161,6 @@ uv pip install -e ".[all,dev]" python -m pytest tests/ -q ``` -> **RL ่ฎญ็ปƒ๏ผˆๅฏ้€‰๏ผ‰๏ผš** ๅฆ‚้œ€ๅ‚ไธŽ RL/Tinker-Atropos ้›†ๆˆๅผ€ๅ‘๏ผš -> ```bash -> git submodule update --init tinker-atropos -> uv pip install -e "./tinker-atropos" -> ``` - --- ## ็คพๅŒบ diff --git a/acp_adapter/auth.py b/acp_adapter/auth.py index a33b5a93938e..7b2556fd0625 100644 --- a/acp_adapter/auth.py +++ b/acp_adapter/auth.py @@ -1,8 +1,11 @@ -"""ACP auth helpers โ€” detect the currently configured Hermes provider.""" +"""ACP auth helpers โ€” detect and advertise Hermes authentication methods.""" from __future__ import annotations -from typing import Optional +from typing import Any, Optional + + +TERMINAL_SETUP_AUTH_METHOD_ID = "hermes-setup" def detect_provider() -> Optional[str]: @@ -22,3 +25,44 @@ def detect_provider() -> Optional[str]: def has_provider() -> bool: """Return True if Hermes can resolve any runtime provider credentials.""" return detect_provider() is not None + + +def build_auth_methods() -> list[Any]: + """Return registry-compatible ACP auth methods for Hermes. + + The official ACP registry validates that agents advertise at least one + usable auth method during the initial handshake. A fresh Zed install may + not have Hermes provider credentials configured yet, so Hermes always + advertises a terminal setup method. When credentials are already present, + it also advertises the resolved provider as the default agent-managed + runtime credential method. + """ + from acp.schema import AuthMethodAgent, TerminalAuthMethod + + methods: list[Any] = [] + provider = detect_provider() + if provider: + methods.append( + AuthMethodAgent( + id=provider, + name=f"{provider} runtime credentials", + description=( + "Authenticate Hermes using the currently configured " + f"{provider} runtime credentials." + ), + ) + ) + + methods.append( + TerminalAuthMethod( + id=TERMINAL_SETUP_AUTH_METHOD_ID, + name="Configure Hermes provider", + description=( + "Open Hermes' interactive model/provider setup in a terminal. " + "Use this when Hermes has not been configured on this machine yet." + ), + type="terminal", + args=["--setup"], + ) + ) + return methods diff --git a/environments/benchmarks/__init__.py b/acp_adapter/bootstrap/__init__.py similarity index 100% rename from environments/benchmarks/__init__.py rename to acp_adapter/bootstrap/__init__.py diff --git a/acp_adapter/bootstrap/bootstrap_browser_tools.ps1 b/acp_adapter/bootstrap/bootstrap_browser_tools.ps1 new file mode 100644 index 000000000000..f840fd2d5592 --- /dev/null +++ b/acp_adapter/bootstrap/bootstrap_browser_tools.ps1 @@ -0,0 +1,288 @@ +# bootstrap_browser_tools.ps1 โ€” install agent-browser + Playwright Chromium +# into ~/.hermes/node/ for use by Hermes Agent's browser tools on Windows. +# +# Targets the registry-install path: users who got Hermes via +# `uvx --from 'hermes-agent[acp]==X' hermes-acp` don't have a repo clone, +# so the install.ps1 `npm install`-in-repo flow doesn't apply. This script +# is a self-contained, idempotent slice of install.ps1's browser block. +# +# Usage: +# .\bootstrap_browser_tools.ps1 # use defaults +# .\bootstrap_browser_tools.ps1 -Yes # accept Chromium download +# .\bootstrap_browser_tools.ps1 -SkipChromium # Node + agent-browser only +# +# Idempotent: re-running this is safe and fast. + +[CmdletBinding()] +param( + [switch]$Yes, + [switch]$SkipChromium +) + +$ErrorActionPreference = "Stop" +$NodeVersion = "22" + +# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +# Logging +# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + +function Write-Info { param([string]$msg) Write-Host "[*] $msg" -ForegroundColor Cyan } +function Write-Success { param([string]$msg) Write-Host "[+] $msg" -ForegroundColor Green } +function Write-Warn { param([string]$msg) Write-Host "[!] $msg" -ForegroundColor Yellow } +function Write-Err { param([string]$msg) Write-Host "[x] $msg" -ForegroundColor Red } + +# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +# Paths +# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + +$HermesHome = $env:HERMES_HOME +if (-not $HermesHome) { + $HermesHome = Join-Path $env:USERPROFILE ".hermes" +} +$NodePrefix = Join-Path $HermesHome "node" + +# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +# Step 1: Node.js +# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + +function Resolve-NpmExe { + # Same gotcha as install.ps1: prefer npm.cmd over npm.ps1 so the + # PowerShell execution policy doesn't block us. + $cmd = Get-Command npm -ErrorAction SilentlyContinue + if (-not $cmd) { return $null } + $npmExe = $cmd.Source + if ($npmExe -like "*.ps1") { + $sibling = Join-Path (Split-Path $npmExe -Parent) "npm.cmd" + if (Test-Path $sibling) { return $sibling } + } + return $npmExe +} + +function Resolve-NpxExe { + $cmd = Get-Command npx -ErrorAction SilentlyContinue + if (-not $cmd) { return $null } + $npxExe = $cmd.Source + if ($npxExe -like "*.ps1") { + $sibling = Join-Path (Split-Path $npxExe -Parent) "npx.cmd" + if (Test-Path $sibling) { return $sibling } + } + return $npxExe +} + +function Ensure-Node { + # System Node on PATH? + $sysNode = Get-Command node -ErrorAction SilentlyContinue + if ($sysNode) { + try { + $v = & $sysNode.Source --version + $major = [int]($v -replace '^v(\d+).*', '$1') + if ($major -ge 20) { + Write-Success "Node.js $v found on PATH" + return + } + Write-Warn "Node.js $v is older than v20 โ€” installing managed Node." + } catch { + Write-Warn "Failed to query Node version: $_" + } + } + + # Hermes-managed Node? + $managedNode = Join-Path $NodePrefix "node.exe" + if (Test-Path $managedNode) { + $v = & $managedNode --version + Write-Success "Node.js $v found (Hermes-managed at $NodePrefix)" + # Prepend to current-process PATH so subsequent npm/npx calls find it. + $env:PATH = "$NodePrefix;$env:PATH" + return + } + + Write-Info "Installing Node.js $NodeVersion LTS into $NodePrefix ..." + + $arch = if ([Environment]::Is64BitOperatingSystem) { "x64" } else { "x86" } + $indexUrl = "https://nodejs.org/dist/latest-v${NodeVersion}.x/" + + try { + $indexPage = Invoke-WebRequest -Uri $indexUrl -UseBasicParsing + $matches = [regex]::Matches($indexPage.Content, "node-v${NodeVersion}\.\d+\.\d+-win-${arch}\.zip") + if ($matches.Count -eq 0) { + Write-Err "Could not locate Node.js $NodeVersion zip for win-$arch" + throw "no tarball" + } + $zipName = $matches[0].Value + $zipUrl = "$indexUrl$zipName" + + $tmpDir = Join-Path $env:TEMP "hermes-node-$([guid]::NewGuid().ToString('N'))" + New-Item -ItemType Directory -Force -Path $tmpDir | Out-Null + $zipPath = Join-Path $tmpDir $zipName + + Write-Info "Downloading $zipName ..." + Invoke-WebRequest -Uri $zipUrl -OutFile $zipPath -UseBasicParsing + + Expand-Archive -Path $zipPath -DestinationPath $tmpDir -Force + $extracted = Get-ChildItem -Path $tmpDir -Directory | Where-Object { $_.Name -like "node-v*" } | Select-Object -First 1 + + if (-not $extracted) { Write-Err "Node.js extraction failed"; throw "extract" } + + if (Test-Path $NodePrefix) { Remove-Item -Recurse -Force $NodePrefix } + New-Item -ItemType Directory -Force -Path $HermesHome | Out-Null + Move-Item -Path $extracted.FullName -Destination $NodePrefix + + Remove-Item -Recurse -Force $tmpDir -ErrorAction SilentlyContinue + + $env:PATH = "$NodePrefix;$env:PATH" + $v = & "$NodePrefix\node.exe" --version + Write-Success "Node.js $v installed to $NodePrefix" + } catch { + Write-Err "Node.js install failed: $_" + Write-Info "Install Node 20+ manually from https://nodejs.org/en/download/ and re-run." + throw + } +} + +# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +# Step 2: agent-browser +# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + +function Ensure-AgentBrowser { + $npmExe = Resolve-NpmExe + if (-not $npmExe) { + Write-Err "npm not on PATH after Node install โ€” aborting" + throw "npm missing" + } + + # Already installed? + $existing = Get-Command agent-browser -ErrorAction SilentlyContinue + if ($existing) { + Write-Success "agent-browser already installed at $($existing.Source)" + return + } + + # When the user has system Node (winget / installer-based), `npm install + # -g` writes to a directory that may require admin rights. Force the + # prefix to the user-writable Hermes-managed Node directory so we never + # need elevation and the agent can always find the result. Mirrors the + # bash bootstrap's `--prefix $NODE_PREFIX` strategy. + New-Item -ItemType Directory -Force -Path $NodePrefix | Out-Null + + Write-Info "Installing agent-browser (npm, prefix=$NodePrefix)..." + & $npmExe install -g --prefix $NodePrefix --silent ` + "agent-browser@^0.26.0" "@askjo/camofox-browser@^1.5.2" + if ($LASTEXITCODE -ne 0) { + Write-Err "npm install -g agent-browser failed (exit $LASTEXITCODE)" + throw "npm install" + } + + # Windows npm global installs drop shims at $NodePrefix\ root (not bin/). + # Prepend to PATH so any subsequent npx call resolves them. + $env:PATH = "$NodePrefix;$env:PATH" + + Write-Success "agent-browser installed to $NodePrefix" +} + +# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +# Step 3: Playwright Chromium +# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + +function Find-SystemBrowser { + $candidates = @( + "C:\Program Files\Google\Chrome\Application\chrome.exe", + "C:\Program Files (x86)\Google\Chrome\Application\chrome.exe", + "C:\Program Files\Chromium\Application\chromium.exe", + "${env:LOCALAPPDATA}\Google\Chrome\Application\chrome.exe", + "${env:LOCALAPPDATA}\Chromium\Application\chromium.exe" + ) + foreach ($p in $candidates) { + if (Test-Path $p) { return $p } + } + # Edge โ€” Chromium-based, agent-browser can use it + foreach ($p in @( + "C:\Program Files (x86)\Microsoft\Edge\Application\msedge.exe", + "C:\Program Files\Microsoft\Edge\Application\msedge.exe" + )) { + if (Test-Path $p) { return $p } + } + return $null +} + +function Write-BrowserEnv { + param([string]$BrowserPath) + $envFile = Join-Path $HermesHome ".env" + New-Item -ItemType Directory -Force -Path $HermesHome | Out-Null + if (Test-Path $envFile) { + $existing = Get-Content $envFile -Raw -ErrorAction SilentlyContinue + if ($existing -and ($existing -match "(?m)^AGENT_BROWSER_EXECUTABLE_PATH=")) { + return + } + } + Add-Content -Path $envFile -Value "" + Add-Content -Path $envFile -Value "# Hermes Agent browser tools โ€” use the system Chrome/Chromium/Edge binary." + Add-Content -Path $envFile -Value "AGENT_BROWSER_EXECUTABLE_PATH=$BrowserPath" + Write-Success "Configured browser tools to use $BrowserPath" +} + +function Confirm-ChromiumDownload { + if ($Yes) { return $true } + if (-not [Environment]::UserInteractive) { + Write-Warn "Non-interactive shell โ€” skipping Chromium prompt." + Write-Info "Re-run with -Yes to install Chromium (~400 MB download)." + return $false + } + $reply = Read-Host "Install Playwright Chromium (~400 MB download)? [y/N]" + return ($reply -match "^(y|yes)$") +} + +function Ensure-Chromium { + if ($SkipChromium) { + Write-Info "Skipping Chromium install (-SkipChromium)" + return + } + + # agent-browser on Windows expects a Playwright-managed Chromium under + # %LOCALAPPDATA%\ms-playwright. The system-browser shortcut from the + # Linux/macOS path doesn't apply the same way on Windows โ€” Playwright's + # default launch path won't pick up a stock Chrome install without an + # explicit AGENT_BROWSER_EXECUTABLE_PATH. We still offer it as a + # fallback when the user doesn't want the download. + + if (-not (Confirm-ChromiumDownload)) { + $sys = Find-SystemBrowser + if ($sys) { + Write-Info "Using system browser at $sys (Chromium download skipped)." + Write-BrowserEnv -BrowserPath $sys + } else { + Write-Info "Chromium install skipped. Browser tools won't launch until" + Write-Info "Chromium is installed or AGENT_BROWSER_EXECUTABLE_PATH is set." + } + return + } + + $npxExe = Resolve-NpxExe + if (-not $npxExe) { + Write-Err "npx not on PATH โ€” cannot install Playwright Chromium" + throw "npx missing" + } + + Write-Info "Installing Playwright Chromium (~400 MB) ..." + & $npxExe --yes playwright install chromium + if ($LASTEXITCODE -ne 0) { + Write-Err "Playwright Chromium install failed (exit $LASTEXITCODE)" + Write-Info "Try again later: npx --yes playwright install chromium" + throw "playwright" + } + Write-Success "Playwright Chromium installed" +} + +# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +# Main +# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + +Write-Info "Hermes Agent: bootstrapping browser tools" +Write-Info " HERMES_HOME = $HermesHome" +Write-Info " OS = Windows" + +Ensure-Node +Ensure-AgentBrowser +Ensure-Chromium + +Write-Success "Browser tools setup complete." +Write-Info "Hermes Agent will pick up agent-browser from $NodePrefix on next launch." diff --git a/acp_adapter/bootstrap/bootstrap_browser_tools.sh b/acp_adapter/bootstrap/bootstrap_browser_tools.sh new file mode 100755 index 000000000000..9981069a6af0 --- /dev/null +++ b/acp_adapter/bootstrap/bootstrap_browser_tools.sh @@ -0,0 +1,399 @@ +#!/usr/bin/env bash +# +# bootstrap_browser_tools.sh โ€” install agent-browser + Playwright Chromium +# into ~/.hermes/node/ for use by Hermes Agent's browser tools. +# +# Targets the registry-install path: users who got Hermes via +# `uvx --from 'hermes-agent[acp]==X' hermes-acp` don't have a repo clone, +# so the install.sh `npm install`-in-repo flow doesn't apply. This script +# is a self-contained, idempotent slice of install.sh's browser block โ€” +# safe to run from `hermes-acp --setup-browser`, from a fresh terminal, +# or from install.sh itself (it's a no-op when everything is already in place). +# +# Usage: +# bootstrap_browser_tools.sh # use defaults +# bootstrap_browser_tools.sh --yes # accept the ~400MB Chromium download +# bootstrap_browser_tools.sh --skip-chromium # only install Node + agent-browser +# HERMES_HOME=/custom/path bootstrap_browser_tools.sh +# +# Idempotent: re-running this is safe and fast. Each step checks whether +# the work is already done. + +set -euo pipefail + +# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +# Config +# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + +NODE_VERSION="22" +HERMES_HOME="${HERMES_HOME:-$HOME/.hermes}" +NODE_PREFIX="$HERMES_HOME/node" + +SKIP_CHROMIUM=false +ASSUME_YES=false + +# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +# Logging +# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + +if [ -t 1 ]; then + C_GREEN='\033[0;32m' + C_YELLOW='\033[0;33m' + C_BLUE='\033[0;34m' + C_RED='\033[0;31m' + C_RESET='\033[0m' +else + C_GREEN='' ; C_YELLOW='' ; C_BLUE='' ; C_RED='' ; C_RESET='' +fi + +log_info() { printf "${C_BLUE}[*]${C_RESET} %s\n" "$*"; } +log_success() { printf "${C_GREEN}[โœ“]${C_RESET} %s\n" "$*"; } +log_warn() { printf "${C_YELLOW}[!]${C_RESET} %s\n" "$*" >&2; } +log_error() { printf "${C_RED}[โœ—]${C_RESET} %s\n" "$*" >&2; } + +# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +# Arg parsing +# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + +while [ $# -gt 0 ]; do + case "$1" in + --skip-chromium) SKIP_CHROMIUM=true ;; + --yes|-y) ASSUME_YES=true ;; + -h|--help) + cat </dev/null 2>&1; then + local found_ver major + found_ver=$(node --version 2>/dev/null) + major=$(echo "$found_ver" | sed -E 's/^v([0-9]+).*/\1/') + if [ -n "$major" ] && [ "$major" -ge 20 ]; then + log_success "Node.js $found_ver found on PATH" + return 0 + fi + log_warn "Node.js $found_ver is older than v20 โ€” installing managed Node." + fi + + if [ -x "$NODE_PREFIX/bin/node" ]; then + local found_ver + found_ver=$("$NODE_PREFIX/bin/node" --version 2>/dev/null || echo "?") + export PATH="$NODE_PREFIX/bin:$PATH" + log_success "Node.js $found_ver found (Hermes-managed at $NODE_PREFIX)" + return 0 + fi + + log_info "Installing Node.js $NODE_VERSION LTS into $NODE_PREFIX ..." + + local index_url="https://nodejs.org/dist/latest-v${NODE_VERSION}.x/" + local tarball_name + tarball_name=$(curl -fsSL "$index_url" \ + | grep -oE "node-v${NODE_VERSION}\.[0-9]+\.[0-9]+-${NODE_OS}-${NODE_ARCH}\.tar\.xz" \ + | head -1) + + if [ -z "$tarball_name" ]; then + tarball_name=$(curl -fsSL "$index_url" \ + | grep -oE "node-v${NODE_VERSION}\.[0-9]+\.[0-9]+-${NODE_OS}-${NODE_ARCH}\.tar\.gz" \ + | head -1) + fi + + if [ -z "$tarball_name" ]; then + log_error "Could not locate Node.js $NODE_VERSION tarball for $NODE_OS-$NODE_ARCH" + log_info "Install Node 20+ manually: https://nodejs.org/en/download/" + return 1 + fi + + local tmp_dir + tmp_dir=$(mktemp -d) + trap 'rm -rf "$tmp_dir"' RETURN + + log_info "Downloading $tarball_name ..." + if ! curl -fsSL "${index_url}${tarball_name}" -o "$tmp_dir/$tarball_name"; then + log_error "Node.js download failed" + return 1 + fi + + if [[ "$tarball_name" == *.tar.xz ]]; then + tar xf "$tmp_dir/$tarball_name" -C "$tmp_dir" + else + tar xzf "$tmp_dir/$tarball_name" -C "$tmp_dir" + fi + + local extracted_dir + extracted_dir=$(ls -d "$tmp_dir"/node-v* 2>/dev/null | head -1) + if [ ! -d "$extracted_dir" ]; then + log_error "Node.js extraction failed" + return 1 + fi + + mkdir -p "$HERMES_HOME" + rm -rf "$NODE_PREFIX" + mv "$extracted_dir" "$NODE_PREFIX" + + export PATH="$NODE_PREFIX/bin:$PATH" + + local installed_ver + installed_ver=$("$NODE_PREFIX/bin/node" --version 2>/dev/null || echo "?") + log_success "Node.js $installed_ver installed to $NODE_PREFIX" +} + +# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +# Step 2: agent-browser + @askjo/camofox-browser via global npm install +# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + +ensure_agent_browser() { + if ! command -v npm >/dev/null 2>&1; then + log_error "npm not on PATH after Node install โ€” aborting" + return 1 + fi + + # _find_agent_browser() in tools/browser_tool.py walks ~/.hermes/node/bin + # plus a few standard prefixes, so installing globally into the managed + # Node prefix is enough โ€” no PATH manipulation needed from the agent side. + if [ -x "$NODE_PREFIX/bin/agent-browser" ] || command -v agent-browser >/dev/null 2>&1; then + log_success "agent-browser already installed" + return 0 + fi + + # When the system's `npm` resolves to a root-owned prefix (e.g. + # /usr/lib/node_modules), `npm install -g` fails with EACCES without + # sudo. Force the prefix to the user-writable Hermes-managed Node + # directory so we never need sudo and the agent can always find the + # result. If we installed Node ourselves above, this is a no-op + # (managed Node already uses $NODE_PREFIX). If the user has system + # Node, we still drop agent-browser under $NODE_PREFIX/bin/ โ€” which + # is exactly where _browser_candidate_path_dirs() looks first. + mkdir -p "$NODE_PREFIX" + + log_info "Installing agent-browser (npm, prefix=$NODE_PREFIX)..." + if ! npm install -g --prefix "$NODE_PREFIX" --silent \ + agent-browser@^0.26.0 \ + "@askjo/camofox-browser@^1.5.2"; then + log_error "npm install -g agent-browser failed" + return 1 + fi + + # macOS/Linux global installs place the shim into $NODE_PREFIX/bin/. + # Add it to PATH for any subsequent steps (npx playwright). + export PATH="$NODE_PREFIX/bin:$PATH" + + log_success "agent-browser installed to $NODE_PREFIX/bin/" +} + +# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +# Step 3: Playwright Chromium +# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + +confirm_chromium_download() { + if [ "$ASSUME_YES" = true ]; then return 0; fi + if [ ! -t 0 ]; then + log_warn "Non-interactive shell โ€” skipping Chromium prompt." + log_info "Re-run with --yes to install Chromium (~400 MB download)." + return 1 + fi + printf "Install Playwright Chromium (~400 MB download)? [y/N] " + local reply="" + read -r reply || reply="" + case "$reply" in + y|Y|yes|YES) return 0 ;; + *) return 1 ;; + esac +} + +# Detect a usable system Chrome/Chromium. agent-browser's Chrome engine can +# use it instead of downloading Playwright's bundled Chromium, saving the +# download cost. Returns the path or empty string. +find_system_browser() { + local candidate + for candidate in google-chrome google-chrome-stable chromium chromium-browser chrome; do + if command -v "$candidate" >/dev/null 2>&1; then + command -v "$candidate" + return 0 + fi + done + # macOS app-bundle locations + if [ "$OS" = "macos" ]; then + for candidate in \ + "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" \ + "/Applications/Chromium.app/Contents/MacOS/Chromium" ; do + if [ -x "$candidate" ]; then + echo "$candidate" + return 0 + fi + done + fi + return 1 +} + +write_browser_env() { + local browser_path="$1" + local env_file="$HERMES_HOME/.env" + mkdir -p "$HERMES_HOME" + if [ -f "$env_file" ] && grep -q "^AGENT_BROWSER_EXECUTABLE_PATH=" "$env_file"; then + return 0 + fi + { + echo "" + echo "# Hermes Agent browser tools โ€” use the system Chrome/Chromium binary." + echo "AGENT_BROWSER_EXECUTABLE_PATH=$browser_path" + } >> "$env_file" + log_success "Configured browser tools to use $browser_path" +} + +ensure_chromium() { + if [ "$SKIP_CHROMIUM" = true ]; then + log_info "Skipping Chromium install (--skip-chromium)" + return 0 + fi + + local system_browser + system_browser="$(find_system_browser 2>/dev/null || true)" + if [ -n "$system_browser" ]; then + log_success "Found system browser: $system_browser" + log_info "Skipping Playwright Chromium download; agent-browser will use it." + write_browser_env "$system_browser" + return 0 + fi + + if ! confirm_chromium_download; then + log_info "Chromium install skipped. Browser tools will only work if you" + log_info "set AGENT_BROWSER_EXECUTABLE_PATH or install Chromium later." + return 0 + fi + + if ! command -v npx >/dev/null 2>&1; then + log_error "npx not on PATH โ€” cannot install Playwright Chromium" + return 1 + fi + + log_info "Installing Playwright Chromium (~400 MB) ..." + + # On apt-based distros, --with-deps requires sudo. Try non-interactively + # only โ€” never prompt โ€” and fall back to the bare browser-only install. + local installed=false + if [ "$OS" = "linux" ]; then + case "$DISTRO" in + ubuntu|debian|raspbian|pop|linuxmint|elementary|zorin|kali|parrot) + if [ "$(id -u)" -eq 0 ] || (command -v sudo >/dev/null 2>&1 && sudo -n true 2>/dev/null); then + log_info "Installing system deps with --with-deps (sudo available)" + if npx --yes playwright install --with-deps chromium; then + installed=true + fi + else + log_warn "sudo not available non-interactively โ€” installing Chromium without system deps." + log_info "If browser tools fail to launch, an administrator should run:" + log_info " sudo npx playwright install-deps chromium" + fi + ;; + arch|manjaro|cachyos|endeavouros|garuda) + log_info "Arch-family system dependencies are not auto-installed." + log_info "If launch fails, run: sudo pacman -S nss atk at-spi2-core cups libdrm libxkbcommon mesa pango cairo alsa-lib" + ;; + fedora|rhel|centos|rocky|alma) + log_info "Fedora/RHEL system dependencies are not auto-installed." + log_info "If launch fails, run: sudo dnf install nss atk at-spi2-core cups-libs libdrm libxkbcommon mesa-libgbm pango cairo alsa-lib" + ;; + opensuse*|sles) + log_info "openSUSE system dependencies are not auto-installed." + ;; + esac + fi + + if [ "$installed" = false ]; then + if npx --yes playwright install chromium; then + installed=true + fi + fi + + if [ "$installed" = true ]; then + log_success "Playwright Chromium installed" + else + log_error "Playwright Chromium install failed" + log_info "Try again later: npx --yes playwright install chromium" + return 1 + fi +} + +# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +# Main +# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + +main() { + log_info "Hermes Agent: bootstrapping browser tools" + log_info " HERMES_HOME = $HERMES_HOME" + log_info " OS / arch = $NODE_OS-$NODE_ARCH ${DISTRO:+($DISTRO)}" + + ensure_node + ensure_agent_browser + ensure_chromium + + log_success "Browser tools setup complete." + log_info "Hermes Agent will pick up agent-browser from $NODE_PREFIX/bin/ on next launch." +} + +main diff --git a/acp_adapter/entry.py b/acp_adapter/entry.py index cc7f835f7e05..cf5c2ba9cfb0 100644 --- a/acp_adapter/entry.py +++ b/acp_adapter/entry.py @@ -24,6 +24,7 @@ # means UTF-8 stdio setup is skipped on Windows; POSIX is unaffected. pass +import argparse import asyncio import logging import sys @@ -107,8 +108,150 @@ def _load_env() -> None: ) -def main() -> None: +def _parse_args(argv: list[str] | None = None) -> argparse.Namespace: + parser = argparse.ArgumentParser( + prog="hermes-acp", + description="Run Hermes Agent as an ACP stdio server.", + ) + parser.add_argument("--version", action="store_true", help="Print Hermes version and exit") + parser.add_argument( + "--check", + action="store_true", + help="Verify ACP dependencies and adapter imports, then exit", + ) + parser.add_argument( + "--setup", + action="store_true", + help="Run interactive Hermes provider/model setup for ACP terminal auth", + ) + parser.add_argument( + "--setup-browser", + action="store_true", + help="Install agent-browser + Playwright Chromium into ~/.hermes/node/ " + "for browser tool support. Idempotent.", + ) + parser.add_argument( + "--yes", + "-y", + action="store_true", + dest="assume_yes", + help="Accept all prompts (currently used by --setup-browser to skip the " + "~400 MB Chromium download confirmation).", + ) + return parser.parse_args(argv) + + +def _print_version() -> None: + from hermes_cli import __version__ as hermes_version + + print(hermes_version) + + +def _run_check() -> None: + import acp # noqa: F401 + from acp_adapter.server import HermesACPAgent # noqa: F401 + + print("Hermes ACP check OK") + + +def _run_setup() -> None: + from hermes_cli.main import main as hermes_main + + old_argv = sys.argv[:] + try: + sys.argv = [old_argv[0] if old_argv else "hermes", "model"] + hermes_main() + finally: + sys.argv = old_argv + + # Offer browser-tools install as a follow-up. The terminal auth method + # is the one supported first-run UX for registry installs, so this is + # the natural moment to ask. Skip silently if stdin isn't a TTY (the + # answer can't be collected anyway). + if not sys.stdin.isatty(): + return + try: + reply = input( + "\nInstall browser tools? Downloads agent-browser (npm) and " + "optionally Playwright Chromium (~400 MB). [y/N] " + ).strip().lower() + except (EOFError, KeyboardInterrupt): + return + if reply in {"y", "yes"}: + _run_setup_browser(assume_yes=False) + + +def _run_setup_browser(assume_yes: bool = False) -> int: + """Bootstrap agent-browser + Playwright Chromium for the registry-install path. + + Shells out to the bundled platform-specific bootstrap script + (acp_adapter/bootstrap/bootstrap_browser_tools.{sh,ps1}) so the install + logic lives in one place โ€” readable, debuggable, and shareable with + install.sh / install.ps1 if we ever want to call it from there too. + + Returns the script's exit code (0 on success). + """ + import platform + import subprocess + + bootstrap_dir = Path(__file__).resolve().parent / "bootstrap" + + if platform.system() == "Windows": + script = bootstrap_dir / "bootstrap_browser_tools.ps1" + if not script.is_file(): + print( + f"Bootstrap script not found at {script} โ€” wheel may be incomplete.", + file=sys.stderr, + ) + return 1 + cmd = [ + "powershell.exe", + "-NoProfile", + "-ExecutionPolicy", "Bypass", + "-File", str(script), + ] + if assume_yes: + cmd.append("-Yes") + else: + script = bootstrap_dir / "bootstrap_browser_tools.sh" + if not script.is_file(): + print( + f"Bootstrap script not found at {script} โ€” wheel may be incomplete.", + file=sys.stderr, + ) + return 1 + cmd = ["bash", str(script)] + if assume_yes: + cmd.append("--yes") + + # stdio is inherited so the user sees the bootstrap's progress live. + try: + result = subprocess.run(cmd, check=False) + except FileNotFoundError as exc: + # bash / powershell.exe not on PATH + print(f"Could not launch browser bootstrap: {exc}", file=sys.stderr) + return 1 + return result.returncode + + +def main(argv: list[str] | None = None) -> None: """Entry point: load env, configure logging, run the ACP agent.""" + args = _parse_args(argv) + if args.version: + _print_version() + return + if args.check: + _run_check() + return + if args.setup: + _run_setup() + return + if args.setup_browser: + rc = _run_setup_browser(assume_yes=args.assume_yes) + if rc != 0: + sys.exit(rc) + return + _setup_logging() _load_env() diff --git a/acp_adapter/server.py b/acp_adapter/server.py index c61bb80e471d..20c4d7cdb4fa 100644 --- a/acp_adapter/server.py +++ b/acp_adapter/server.py @@ -57,13 +57,7 @@ UserMessageChunk, ) -# AuthMethodAgent was renamed from AuthMethod in agent-client-protocol 0.9.0 -try: - from acp.schema import AuthMethodAgent -except ImportError: - from acp.schema import AuthMethod as AuthMethodAgent # type: ignore[attr-defined] - -from acp_adapter.auth import detect_provider +from acp_adapter.auth import TERMINAL_SETUP_AUTH_METHOD_ID, build_auth_methods, detect_provider from acp_adapter.events import ( make_message_cb, make_step_cb, @@ -744,16 +738,7 @@ async def initialize( resolved_protocol_version = ( protocol_version if isinstance(protocol_version, int) else acp.PROTOCOL_VERSION ) - provider = detect_provider() - auth_methods = None - if provider: - auth_methods = [ - AuthMethodAgent( - id=provider, - name=f"{provider} runtime credentials", - description=f"Authenticate Hermes using the currently configured {provider} runtime credentials.", - ) - ] + auth_methods = build_auth_methods() client_name = client_info.name if client_info else "unknown" logger.info( @@ -784,10 +769,18 @@ async def authenticate(self, method_id: str, **kwargs: Any) -> AuthenticateRespo # server has provider credentials configured โ€” harmless under # Hermes' threat model (ACP is stdio-only, local-trust), but poor # API hygiene and confusing if ACP ever grows multi-method auth. - provider = detect_provider() - if not provider: + if not isinstance(method_id, str): return None - if not isinstance(method_id, str) or method_id.strip().lower() != provider: + normalized_method = method_id.strip().lower() + provider = detect_provider() + + if normalized_method == TERMINAL_SETUP_AUTH_METHOD_ID: + # Terminal auth launches Hermes setup/model selection out-of-band. + # Only report success once that flow has produced usable runtime + # credentials for the normal ACP session. + return AuthenticateResponse() if provider else None + + if not provider or normalized_method != provider: return None return AuthenticateResponse() diff --git a/acp_registry/agent.json b/acp_registry/agent.json index 492a84445d4e..b94a48e089fd 100644 --- a/acp_registry/agent.json +++ b/acp_registry/agent.json @@ -1,12 +1,16 @@ { - "schema_version": 1, - "name": "hermes-agent", - "display_name": "Hermes Agent", - "description": "AI agent by Nous Research with 90+ tools, persistent memory, and multi-platform support", - "icon": "icon.svg", + "id": "hermes-agent", + "name": "Hermes Agent", + "version": "0.13.0", + "description": "Self-improving open-source AI agent by Nous Research with ACP editor integration, persistent memory, skills, and rich tool support.", + "repository": "https://github.com/NousResearch/hermes-agent", + "website": "https://hermes-agent.nousresearch.com/docs/user-guide/features/acp", + "authors": ["Nous Research"], + "license": "MIT", "distribution": { - "type": "command", - "command": "hermes", - "args": ["acp"] + "uvx": { + "package": "hermes-agent[acp]==0.13.0", + "args": ["hermes-acp"] + } } } diff --git a/acp_registry/icon.svg b/acp_registry/icon.svg index fc08ec051906..f42c0daea458 100644 --- a/acp_registry/icon.svg +++ b/acp_registry/icon.svg @@ -1,25 +1,8 @@ - - - - - - - - - - - - - - - - - - - - - - + + + + + + + diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index ee0ec917f5da..96ad615bf6f4 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -1456,8 +1456,21 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]: nous = _read_nous_auth() runtime = _resolve_nous_runtime_api(force_refresh=False) if runtime is None and not nous: + logger.warning( + "Auxiliary Nous client unavailable: no Nous authentication found " + "(run: hermes auth)." + ) _mark_provider_unhealthy("nous", ttl=60) return None, None + if runtime is None and nous: + # Runtime credential mint failed but stored Nous auth is still present. + # Falls back to the raw stored token below; surface a debug line so + # operators investigating expired/invalid sessions have a breadcrumb, + # without blocking the fallback path the rest of this function relies on. + logger.debug( + "Auxiliary Nous: runtime credential mint failed; falling back to " + "stored auth.json token." + ) global auxiliary_is_nous auxiliary_is_nous = True logger.debug("Auxiliary client: Nous Portal") diff --git a/agent/context_compressor.py b/agent/context_compressor.py index df75b8b88ce4..e7a14faf51b7 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -1429,15 +1429,23 @@ def compress(self, messages: List[Dict[str, Any]], current_tokens: int = None, f return messages turns_to_summarize = messages[compress_start:compress_end] + # A persisted handoff summary can sit in the protected head after a + # resume (commonly immediately after the system prompt). Search from + # the first non-system message through the compression window so we can + # rehydrate iterative-summary state without serializing that handoff as + # a new turn. Protected messages after the handoff remain live context, + # so only summarize messages that are both after the handoff and inside + # the current compression window. + summary_search_start = 1 if messages and messages[0].get("role") == "system" else 0 summary_idx, summary_body = self._find_latest_context_summary( messages, - compress_start, + summary_search_start, compress_end, ) if summary_idx is not None: if summary_body and not self._previous_summary: self._previous_summary = summary_body - turns_to_summarize = messages[summary_idx + 1:compress_end] + turns_to_summarize = messages[max(compress_start, summary_idx + 1):compress_end] if not self.quiet_mode: logger.info( diff --git a/agent/display.py b/agent/display.py index 6c5c970aeff2..cdfc88f46a3b 100644 --- a/agent/display.py +++ b/agent/display.py @@ -240,21 +240,6 @@ def build_tool_preview(tool_name: str, args: dict, max_len: int | None = None) - msg = msg[:17] + "..." return f"to {target}: \"{msg}\"" - if tool_name.startswith("rl_"): - rl_previews = { - "rl_list_environments": "listing envs", - "rl_select_environment": args.get("name", ""), - "rl_get_current_config": "reading config", - "rl_edit_config": f"{args.get('field', '')}={args.get('value', '')}", - "rl_start_training": "starting", - "rl_check_status": args.get("run_id", "")[:16], - "rl_stop_training": f"stopping {args.get('run_id', '')[:16]}", - "rl_get_results": args.get("run_id", "")[:16], - "rl_list_runs": "listing runs", - "rl_test_inference": f"{args.get('num_steps', 3)} steps", - } - return rl_previews.get(tool_name) - key = primary_args.get(tool_name) if not key: for fallback_key in ("query", "text", "command", "path", "name", "prompt", "code", "goal"): @@ -981,15 +966,6 @@ def _wrap(line: str) -> str: if action == "list": return _wrap(f"โ”Š โฐ cron listing {dur}") return _wrap(f"โ”Š โฐ cron {action} {args.get('job_id', '')} {dur}") - if tool_name.startswith("rl_"): - rl = { - "rl_list_environments": "list envs", "rl_select_environment": f"select {args.get('name', '')}", - "rl_get_current_config": "get config", "rl_edit_config": f"set {args.get('field', '?')}", - "rl_start_training": "start training", "rl_check_status": f"status {args.get('run_id', '?')[:12]}", - "rl_stop_training": f"stop {args.get('run_id', '?')[:12]}", "rl_get_results": f"results {args.get('run_id', '?')[:12]}", - "rl_list_runs": "list runs", "rl_test_inference": "test inference", - } - return _wrap(f"โ”Š ๐Ÿงช rl {rl.get(tool_name, tool_name.replace('rl_', ''))} {dur}") if tool_name == "execute_code": code = args.get("code", "") first_line = code.strip().split("\n")[0] if code.strip() else "" diff --git a/agent/lsp/manager.py b/agent/lsp/manager.py index a0d3eb98c300..34c0b0ba92b4 100644 --- a/agent/lsp/manager.py +++ b/agent/lsp/manager.py @@ -40,7 +40,7 @@ import threading import time from concurrent.futures import Future as ConcurrentFuture -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Callable, Dict, List, Optional, Tuple from agent.lsp import eventlog from agent.lsp.client import ( @@ -305,6 +305,7 @@ def get_diagnostics_sync( *, delta: bool = True, timeout: Optional[float] = None, + line_shift: Optional[Callable[[int], Optional[int]]] = None, ) -> List[Dict[str, Any]]: """Synchronously open ``file_path`` in the right server, wait for diagnostics, return them. @@ -314,6 +315,18 @@ def get_diagnostics_sync( Diagnostics present in the baseline are removed so the caller only sees errors introduced by the current edit. + When ``line_shift`` is provided, baseline diagnostics are + remapped through it before the set-difference. This handles + the case where the edit deleted or inserted lines, causing + pre-existing diagnostics below the edit point to surface at + different line numbers in the post-edit snapshot โ€” without + the shift, they'd all look "introduced by this edit". Pass + a callable built by + :func:`agent.lsp.range_shift.build_line_shift` (pre_text, + post_text). Omit when pre/post content isn't available; + the unshifted comparison still catches diagnostics that + didn't move. + Returns an empty list when LSP is disabled, when no workspace can be detected, when no server matches, or when the server can't be spawned. Never raises. @@ -344,6 +357,14 @@ def get_diagnostics_sync( if delta: baseline = self._delta_baseline.get(abs_path) or [] if baseline: + if line_shift is not None: + # Remap baseline diagnostics into post-edit + # coordinates so shifted-but-otherwise-identical + # entries hash equal under _diag_key. Entries + # that mapped into a deleted region drop out + # silently โ€” they no longer apply. + from agent.lsp.range_shift import shift_baseline + baseline = shift_baseline(baseline, line_shift) seen = {_diag_key(d) for d in baseline} diags = [d for d in diags if _diag_key(d) not in seen] # Roll baseline forward โ€” next call returns deltas relative @@ -585,8 +606,19 @@ def get_status(self) -> Dict[str, Any]: def _diag_key(d: Dict[str, Any]) -> str: - """Content equality key used for delta filtering. Mirrors - :func:`agent.lsp.client._diagnostic_key`.""" + """Content equality key used for cross-edit delta filtering. + + Includes the diagnostic's position range โ€” when used together + with :func:`agent.lsp.range_shift.shift_baseline`, the baseline + is line-shifted into post-edit coordinates BEFORE this key is + computed, so identical-but-shifted diagnostics hash equal. Two + genuinely distinct diagnostics at different lines (e.g. the same + error class introduced at a second site) hash differently and + are surfaced as new. + + Mirrors :func:`agent.lsp.client._diagnostic_key`; intentionally + identical so the two layers agree on diagnostic identity. + """ rng = d.get("range") or {} start = rng.get("start") or {} end = rng.get("end") or {} diff --git a/agent/lsp/range_shift.py b/agent/lsp/range_shift.py new file mode 100644 index 000000000000..8efdfc309821 --- /dev/null +++ b/agent/lsp/range_shift.py @@ -0,0 +1,149 @@ +"""Diff-aware line-shift map for cross-edit LSP delta filtering. + +When an edit deletes or inserts lines in the middle of a file, every +diagnostic below the edit point shifts to a new line number. The +LSPService delta filter subtracts the pre-edit baseline from the +post-edit diagnostics keyed on ``(severity, code, source, message, +range)`` โ€” without an adjustment, the shifted-but-otherwise-identical +diagnostics look brand-new and the agent gets flooded with noise. + +The fix used here is the same trick git's blame and unified diff use: +build a piecewise-linear map from pre-edit line numbers to post-edit +line numbers, then apply that map to baseline diagnostics before the +set-difference. Diagnostics whose pre-edit line is in a region the +edit deleted return ``None`` and are dropped from the baseline (they +genuinely no longer apply). + +Trade-off vs. dropping range from the key entirely (the previous +fix): preserves the "new instance of an identical error at a +different line" signal โ€” if the model introduces a second instance +of the same error class at a different location, that one will be +surfaced as new instead of swallowed by content-only dedup. + +The map is derived from ``difflib.SequenceMatcher.get_opcodes()`` and +exposed as a single callable so callers don't have to reason about +diff regions. +""" +from __future__ import annotations + +import difflib +from typing import Any, Callable, Dict, List, Optional + + +def build_line_shift(pre_text: str, post_text: str) -> Callable[[int], Optional[int]]: + """Build a function mapping pre-edit line numbers to post-edit line numbers. + + Lines are 0-indexed to match the LSP wire format + (``range.start.line`` is 0-indexed). + + The returned callable takes a pre-edit 0-indexed line number and + returns the corresponding post-edit 0-indexed line number, or + ``None`` if that line was deleted by the edit (no post-edit + counterpart exists). + + Cost: one ``SequenceMatcher.get_opcodes()`` call up front; the + returned closure is O(log n) per call (binary search over opcode + regions). Cheap enough to call once per write/patch and apply to + every baseline diagnostic. + """ + pre_lines = pre_text.splitlines() if pre_text else [] + post_lines = post_text.splitlines() if post_text else [] + + # Trivial case: identical content or no content โ€” identity map. + if pre_lines == post_lines: + return lambda line: line + + # SequenceMatcher.get_opcodes() returns a list of + # (tag, i1, i2, j1, j2) where tag is 'equal', 'replace', 'delete', + # or 'insert'. i1:i2 is the range in pre, j1:j2 is the range in + # post. We build a list of (i1, i2, j1, j2, tag) tuples and + # binary-search by i for each lookup. + sm = difflib.SequenceMatcher(a=pre_lines, b=post_lines, autojunk=False) + opcodes = sm.get_opcodes() + + def shift(line: int) -> Optional[int]: + # Find the opcode region whose i1 <= line < i2. + # Linear scan is fine โ€” typical opcode count is small (single + # digits for a typical patch-tool edit). + for tag, i1, i2, j1, j2 in opcodes: + if i1 <= line < i2: + if tag == "equal": + # Pre-line N โ†’ post-line (N - i1 + j1). + return line - i1 + j1 + if tag == "delete": + # Pre-line is in a deleted region โ€” no post counterpart. + return None + if tag == "replace": + # Replace == delete + insert; the pre-line has no + # post counterpart in any meaningful sense. Drop. + return None + # 'insert' has i1 == i2 so line < i2 can't be hit. + if line < i1: + # Past the relevant region โ€” handled in earlier iteration. + break + # Past the last opcode region (line >= len(pre_lines)). + # Anchor at end of post. + return max(0, len(post_lines) - 1) if post_lines else None + + return shift + + +def shift_diagnostic_range(diag: Dict[str, Any], + shift: Callable[[int], Optional[int]]) -> Optional[Dict[str, Any]]: + """Return a copy of ``diag`` with its line range remapped through ``shift``. + + Returns ``None`` if the diagnostic's start line maps to ``None`` + (the line was deleted by the edit) โ€” caller drops it from the + baseline since the diagnostic no longer applies. + + Both ``start.line`` and ``end.line`` are remapped independently; + when only the end maps to ``None`` (rare, multi-line diagnostic + straddling the edit boundary) we collapse to a single-line range + at the shifted start to keep the diagnostic in the baseline. + + The original ``diag`` is not mutated. + """ + rng = diag.get("range") or {} + start = rng.get("start") or {} + end = rng.get("end") or {} + + pre_start_line = int(start.get("line", 0)) + pre_end_line = int(end.get("line", pre_start_line)) + + new_start_line = shift(pre_start_line) + if new_start_line is None: + return None + + new_end_line = shift(pre_end_line) + if new_end_line is None: + # Diagnostic straddled the deletion โ€” collapse to start. + new_end_line = new_start_line + + shifted = dict(diag) + shifted["range"] = { + "start": { + "line": new_start_line, + "character": int(start.get("character", 0)), + }, + "end": { + "line": new_end_line, + "character": int(end.get("character", 0)), + }, + } + return shifted + + +def shift_baseline(baseline: List[Dict[str, Any]], + shift: Callable[[int], Optional[int]]) -> List[Dict[str, Any]]: + """Apply ``shift`` to every diagnostic in ``baseline``, dropping deleted entries.""" + out: List[Dict[str, Any]] = [] + for d in baseline: + if not isinstance(d, dict): + continue + shifted = shift_diagnostic_range(d, shift) + if shifted is not None: + out.append(shifted) + return out + + +__all__ = ["build_line_shift", "shift_diagnostic_range", "shift_baseline"] diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py index 6bd36387835d..8ff435f8b91e 100644 --- a/agent/prompt_builder.py +++ b/agent/prompt_builder.py @@ -457,6 +457,13 @@ def _strip_yaml_frontmatter(content: str) -> str: "files arrive as downloadable documents. You can also include image " "URLs in markdown format ![alt](url) and they will be sent as photos." ), + "tlon": ( + "You are on Tlon, a decentralized messaging platform built on Urbit. " + "Keep replies conversational and concise. Basic Markdown formatting is okay, " + "but avoid tables and overly complex layout because messages render inside chat. " + "Incoming Tlon blobs are converted into readable attachment annotations and, " + "when safe to fetch, local media/document paths for tool access." + ), "email": ( "You are communicating via email. Write clear, well-structured responses " "suitable for email. Use plain text formatting (no markdown). " diff --git a/cli-config.yaml.example b/cli-config.yaml.example index 13d9ad9c4206..f5fb71563806 100644 --- a/cli-config.yaml.example +++ b/cli-config.yaml.example @@ -457,7 +457,7 @@ prompt_caching: # Two stores: MEMORY.md (agent's notes) and USER.md (user profile). # Character limits keep the memory small and focused. The agent manages # pruning -- when at the limit, it must consolidate or replace entries. -# Disabled by default in batch_runner and RL environments. +# Disabled by default in batch_runner. # memory: # Agent's personal notes: environment facts, conventions, things learned @@ -681,6 +681,16 @@ platform_toolsets: # # allowed_chats: ["-1001234567890"] # extra: # disable_link_previews: false # Set true to suppress Telegram URL previews in bot messages +# +# Discord-specific settings (config.yaml top-level, not under platforms:): +# +# discord: +# require_mention: true # Require @mention in server channels (default: true) +# auto_thread: true # Auto-create thread on @mention (default: true) +# free_response_channels: "" # Channel IDs where no mention is needed +# reactions: true # Show processing reactions (default: true) +# history_backfill: true # Recover missed channel messages on mention (default: true) +# history_backfill_limit: 50 # Max messages to scan backwards (default: 50) # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ # Available toolsets (use these names in platform_toolsets or the toolsets list) @@ -705,10 +715,9 @@ platform_toolsets: # todo - todo (in-memory task planning, no deps) # tts - text_to_speech (Edge TTS free, or ELEVENLABS/OPENAI/MINIMAX/MISTRAL key) # cronjob - cronjob (create/list/update/pause/resume/run/remove scheduled tasks) -# rl - rl_list_environments, rl_start_training, etc. (requires TINKER_API_KEY) # # PRESETS (curated bundles): -# hermes-cli - All of the above except rl + send_message +# hermes-cli - All of the above except send_message # hermes-telegram - terminal, file, web, vision, image_gen, tts, browser, # skills, todo, cronjob, send_message # hermes-discord - Same as hermes-telegram @@ -734,7 +743,6 @@ platform_toolsets: # session_search - Search and recall past conversations (FTS5 + Gemini Flash summarization) # tts - Text-to-speech (Edge TTS free, ElevenLabs, OpenAI, MiniMax, Mistral) # cronjob - Schedule and manage automated tasks (CLI-only) -# rl - RL training tools (Tinker-Atropos) # # Composite toolsets: # debugging - terminal + web + file (for troubleshooting) diff --git a/cli.py b/cli.py index 75506adc655d..50e7a8c8ce9a 100644 --- a/cli.py +++ b/cli.py @@ -1242,7 +1242,13 @@ def _prune_orphaned_branches(repo_root: str) -> None: def _hex_to_ansi(hex_color: str, *, bold: bool = False) -> str: - """Convert a hex color like '#268bd2' to a true-color ANSI escape.""" + """Convert a hex color like '#268bd2' to a true-color ANSI escape. + + Auto-remaps known dark-mode-tuned colors to readable light-mode + equivalents when running on a light terminal (see + _maybe_remap_for_light_mode + _LIGHT_MODE_REMAP). + """ + hex_color = _maybe_remap_for_light_mode(hex_color) try: r = int(hex_color[1:3], 16) g = int(hex_color[3:5], 16) @@ -1253,6 +1259,250 @@ def _hex_to_ansi(hex_color: str, *, bold: bool = False) -> str: return _ACCENT_ANSI_DEFAULT if bold else "\033[38;2;184;134;11m" +# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +# Light/dark terminal mode detection. +# +# Mirrors ui-tui/src/theme.ts detectLightMode(). Used to decide whether +# to remap "near-white" skin colors (e.g. #FFF8DC banner_text, #B8860B +# banner_dim) to darker equivalents that are readable on a light +# Terminal.app / iTerm2 background. +# +# Detection priority: +# 1. HERMES_LIGHT / HERMES_TUI_LIGHT env (true/false) โ€” explicit override +# 2. HERMES_TUI_THEME=light|dark โ€” explicit theme +# 3. HERMES_TUI_BACKGROUND=#RRGGBB โ€” explicit bg hint +# 4. COLORFGBG env (set by xterm/Konsole/urxvt) โ€” bg slot 7/15 = light +# 5. OSC 11 query (\x1b]11;?\x1b\\) โ€” ask the terminal directly +# 6. Default: assume dark (matches the legacy Hermes assumption) +# +# Cached after first call so we don't query the terminal repeatedly. +_LIGHT_MODE_CACHE: bool | None = None +_TRUE_RE = re.compile(r"^(1|true|on|yes|y)$") +_FALSE_RE = re.compile(r"^(0|false|off|no|n)$") +_LIGHT_DEFAULT_TERM_PROGRAMS = frozenset() # Apple_Terminal doesn't reliably indicate; require explicit + + +def _luminance_from_hex(hex_str: str) -> float | None: + s = (hex_str or "").strip().lstrip("#") + if len(s) == 3: + s = "".join(c * 2 for c in s) + if len(s) != 6 or not all(c in "0123456789abcdefABCDEF" for c in s): + return None + try: + r, g, b = int(s[0:2], 16), int(s[2:4], 16), int(s[4:6], 16) + except ValueError: + return None + # Rec.709 luma + return (0.2126 * r + 0.7152 * g + 0.0722 * b) / 255.0 + + +def _query_osc11_background() -> str | None: + """Ask the terminal for its background color via OSC 11. + + Most modern terminals reply with \x1b]11;rgb:RRRR/GGGG/BBBB\x1b\\ + within a few ms. We wait up to 100ms total before giving up. + Returns "#RRGGBB" or None on timeout / non-tty. + """ + if not sys.stdin.isatty() or not sys.stdout.isatty(): + return None + try: + import termios + import tty + fd = sys.stdin.fileno() + old = termios.tcgetattr(fd) + except Exception: + return None + try: + try: + tty.setcbreak(fd) + except Exception: + return None + try: + sys.stdout.write("\x1b]11;?\x1b\\") + sys.stdout.flush() + except Exception: + return None + # Read up to ~50ms for the response + import select + deadline = time.monotonic() + 0.1 + buf = b"" + while time.monotonic() < deadline: + r, _, _ = select.select([fd], [], [], deadline - time.monotonic()) + if not r: + continue + try: + chunk = os.read(fd, 64) + except OSError: + break + if not chunk: + break + buf += chunk + if b"\x1b\\" in buf or b"\x07" in buf: + break + # Parse: \x1b]11;rgb:RRRR/GGGG/BBBB\x1b\\ + m = re.search(rb"rgb:([0-9a-fA-F]+)/([0-9a-fA-F]+)/([0-9a-fA-F]+)", buf) + if not m: + return None + # Each component is 1-4 hex digits โ€” normalize to 8-bit + def norm(h: bytes) -> int: + v = int(h, 16) + # Scale to 0-255 based on hex length + bits = len(h) * 4 + return (v * 255) // ((1 << bits) - 1) if bits else 0 + r, g, b = norm(m.group(1)), norm(m.group(2)), norm(m.group(3)) + return f"#{r:02X}{g:02X}{b:02X}" + finally: + try: + termios.tcsetattr(fd, termios.TCSANOW, old) + except Exception: + pass + + +def _detect_light_mode() -> bool: + global _LIGHT_MODE_CACHE + if _LIGHT_MODE_CACHE is not None: + return _LIGHT_MODE_CACHE + result = False + try: + # 1. Explicit env override + for var in ("HERMES_LIGHT", "HERMES_TUI_LIGHT"): + v = (os.environ.get(var) or "").strip().lower() + if _TRUE_RE.match(v): + result = True + _LIGHT_MODE_CACHE = result + return result + if _FALSE_RE.match(v): + _LIGHT_MODE_CACHE = result + return result + # 2. Theme hint + theme = (os.environ.get("HERMES_TUI_THEME") or "").strip().lower() + if theme == "light": + result = True + _LIGHT_MODE_CACHE = result + return result + if theme == "dark": + _LIGHT_MODE_CACHE = result + return result + # 3. Explicit bg hex + bg_hint = os.environ.get("HERMES_TUI_BACKGROUND") or "" + bg_lum = _luminance_from_hex(bg_hint) + if bg_lum is not None: + result = bg_lum >= 0.5 + _LIGHT_MODE_CACHE = result + return result + # 4. COLORFGBG (xterm/Konsole/urxvt) + cfgbg = (os.environ.get("COLORFGBG") or "").strip() + if cfgbg: + last = cfgbg.split(";")[-1] if ";" in cfgbg else cfgbg + if last.isdigit(): + bg = int(last) + if bg in (7, 15): + result = True + _LIGHT_MODE_CACHE = result + return result + if 0 <= bg < 16: + _LIGHT_MODE_CACHE = result + return result + # 5. OSC 11 query (best-effort, only when stdin/stdout are TTY) + bg_color = _query_osc11_background() + if bg_color: + lum = _luminance_from_hex(bg_color) + if lum is not None: + result = lum >= 0.5 + _LIGHT_MODE_CACHE = result + return result + # 6. TERM_PROGRAM allow-list (currently empty) + tp = (os.environ.get("TERM_PROGRAM") or "").strip() + if tp in _LIGHT_DEFAULT_TERM_PROGRAMS: + result = True + except Exception: + result = False + _LIGHT_MODE_CACHE = result + return result + + +# Light-mode equivalents of skin colors that are unreadable on cream +# Terminal.app backgrounds. Used by _SkinAwareAnsi to remap colors +# at resolution time when light mode is detected. +# +# IMPORTANT: only remap colors that are used as STANDALONE foregrounds +# on the terminal's background. Don't remap colors that are paired +# with a dark bg (e.g. status bar text on bg:#1a1a2e) โ€” those would +# become invisible the OTHER direction (dark gray on dark navy). +_LIGHT_MODE_REMAP: dict[str, str] = { + # Original (dark-mode) -> Light-mode replacement (darker, readable) + "#FFF8DC": "#1A1A1A", # cornsilk -> near-black + "#FFD700": "#9A6B00", # gold -> dark goldenrod (readable on cream) + "#FFBF00": "#8A5A00", # amber -> dark amber + "#B8860B": "#5C4500", # dark goldenrod -> deeper brown (more contrast) + "#DAA520": "#6B4F00", # goldenrod -> dark olive + "#F1E6CF": "#1A1A1A", # cream -> near-black + "#c9d1d9": "#24292F", # github-light fg + "#EAF7FF": "#0F1B26", # ice + "#F5F5F5": "#1A1A1A", + "#FFF0D4": "#1A1A1A", + "#CD7F32": "#8A4F1A", # bronze -> darker bronze + "#FFEFB5": "#3A2A00", + # NOTE: skipping #C0C0C0/#888888/#555555/#8B8682 โ€” those are + # status-bar foregrounds paired with dark navy bg, where dark + # remap values would become invisible. +} + + +def _maybe_remap_for_light_mode(hex_color: str) -> str: + """If we're in light mode, remap a dark-mode-tuned color to a + higher-contrast equivalent. No-op in dark mode.""" + if not _detect_light_mode(): + return hex_color + if not hex_color or not hex_color.startswith("#"): + return hex_color + # Case-insensitive lookup + upper = hex_color.upper() + if upper in _LIGHT_MODE_REMAP_UPPER: + return _LIGHT_MODE_REMAP_UPPER[upper] + return hex_color + + +# Pre-uppercased lookup table for case-insensitive remapping +_LIGHT_MODE_REMAP_UPPER = {k.upper(): v for k, v in _LIGHT_MODE_REMAP.items()} + + +def _install_skin_light_mode_hook() -> None: + """Wrap SkinConfig.get_color at import time so EVERY skin color read goes + through the light-mode remap. Idempotent.""" + try: + from hermes_cli.skin_engine import SkinConfig # type: ignore[import] + except Exception: + return + if getattr(SkinConfig, "_hermes_light_mode_hook_installed", False): + return + _orig_get_color = SkinConfig.get_color + + def _wrapped_get_color(self, key, fallback=""): + value = _orig_get_color(self, key, fallback) + try: + return _maybe_remap_for_light_mode(value) + except Exception: + return value + + SkinConfig.get_color = _wrapped_get_color # type: ignore[method-assign] + SkinConfig._hermes_light_mode_hook_installed = True # type: ignore[attr-defined] + + +_install_skin_light_mode_hook() + + +# Prime the light-mode detection cache early (at module load) when +# we're running interactively so OSC 11 happens before pt grabs the +# tty. Skip for non-tty contexts (subagents, gateway, tests). +try: + if sys.stdin.isatty() and sys.stdout.isatty(): + _detect_light_mode() +except Exception: + pass + + + class _SkinAwareAnsi: """Lazy ANSI escape that resolves from the skin engine on first use. @@ -1290,7 +1540,12 @@ def reset(self) -> None: _ACCENT = _SkinAwareAnsi("response_border", "#FFD700", bold=True) -_DIM = _SkinAwareAnsi("banner_dim", "#B8860B") +# Use ANSI dim+italic attributes (\x1b[2;3m) instead of a hardcoded +# hex color so dim/thinking text inherits the terminal's default +# foreground color and stays readable in both light and dark +# Terminal.app modes. Hardcoded skin colors like #B8860B +# (dark goldenrod) become invisible against light cream backgrounds. +_DIM = "\x1b[2;3m" def _accent_hex() -> str: @@ -1710,43 +1965,7 @@ def _resolve_attachment_path(raw_path: str) -> Path | None: return resolved -def _format_process_notification(evt: dict) -> "str | None": - """Format a process notification event into a [IMPORTANT: ...] message. - Handles both completion events (notify_on_complete) and watch pattern - match events from the unified completion_queue. - """ - evt_type = evt.get("type", "completion") - _sid = evt.get("session_id", "unknown") - _cmd = evt.get("command", "unknown") - - if evt_type == "watch_disabled": - return f"[IMPORTANT: {evt.get('message', '')}]" - - if evt_type == "watch_match": - _pat = evt.get("pattern", "?") - _out = evt.get("output", "") - _sup = evt.get("suppressed", 0) - text = ( - f"[IMPORTANT: Background process {_sid} matched " - f"watch pattern \"{_pat}\".\n" - f"Command: {_cmd}\n" - f"Matched output:\n{_out}" - ) - if _sup: - text += f"\n({_sup} earlier matches were suppressed by rate limit)" - text += "]" - return text - - # Default: completion event - _exit = evt.get("exit_code", "?") - _out = evt.get("output", "") - return ( - f"[IMPORTANT: Background process {_sid} completed " - f"(exit code {_exit}).\n" - f"Command: {_cmd}\n" - f"Output:\n{_out}]" - ) def _detect_file_drop(user_input: str) -> "dict | None": @@ -2980,25 +3199,27 @@ def _use_minimal_tui_chrome(self, width: Optional[int] = None) -> bool: @staticmethod def _scrollback_box_width(width: Optional[int] = None) -> int: - """Return a resize-safe width for printed scrollback box rules. - - Lines already printed to terminal scrollback are reflowed by the - terminal emulator when the column count shrinks. A full-width response - border drawn at, say, 200 columns will wrap into two or three rows of - dashes after the user resizes to 80 columns, looking like duplicated - separator lines (the family of bugs tracked by #18449, #19280, #22976). - - Keep decorative scrollback boxes intentionally narrower than the - viewport so a moderate resize never triggers reflow. The live TUI - footer (status bar, input rule) still uses the full width โ€” only - content that is *stamped into scrollback* needs this clamp. + """Return the full viewport width for printed scrollback box rules. + + Previously this clamped to ``max(32, min(width, 56))`` as a defense + against terminal-emulator reflow on column-shrink (#25975, salvaging + #24403). That clamp made response/reasoning borders look stubby on + any modern wide terminal. We now trust the prompt_toolkit + ``_output_screen_diff`` monkey-patch landed in #26137 (salvaging + #25981) to keep chrome out of scrollback in the first place, and + accept that an aggressive column-shrink may visually reflow already + printed Panel borders โ€” that's a cosmetic artifact of stamped + scrollback history, not a live-render bug. + + A small floor (32 cols) is kept so the box still renders on tiny + terminals without negative ``'โ”€' * (w - 2)`` math. """ if width is None: try: width = shutil.get_terminal_size((80, 24)).columns except Exception: width = 80 - return max(32, min(int(width or 80), 56)) + return max(32, int(width or 80)) def _tui_input_rule_height(self, position: str, width: Optional[int] = None) -> int: """Return the visible height for the top/bottom input separator rules.""" @@ -3113,8 +3334,11 @@ def _build_status_bar_text(self, width: Optional[int] = None) -> str: percent_label = f"{percent}%" if percent is not None else "--" duration_label = snapshot["duration"] + yolo_active = bool(os.getenv("HERMES_YOLO_MODE")) if width < 52: text = f"โš• {snapshot['model_short']} ยท {duration_label}" + if yolo_active: + text += " ยท โš  YOLO" return self._trim_status_bar_text(text, width) if width < 76: parts = [f"โš• {snapshot['model_short']}", percent_label] @@ -3122,6 +3346,8 @@ def _build_status_bar_text(self, width: Optional[int] = None) -> str: if compressions: parts.append(f"๐Ÿ—œ๏ธ {compressions}") parts.append(duration_label) + if yolo_active: + parts.append("โš  YOLO") return self._trim_status_bar_text(" ยท ".join(parts), width) if snapshot["context_length"]: @@ -3139,6 +3365,8 @@ def _build_status_bar_text(self, width: Optional[int] = None) -> str: prompt_elapsed = snapshot.get("prompt_elapsed") if prompt_elapsed: parts.append(prompt_elapsed) + if yolo_active: + parts.append("โš  YOLO") return self._trim_status_bar_text(" โ”‚ ".join(parts), width) except Exception: return f"โš• {self.model if getattr(self, 'model', None) else 'Hermes'}" @@ -3155,6 +3383,7 @@ def _get_status_bar_fragments(self): # line and produce duplicated status bar rows over long sessions. width = self._get_tui_terminal_width() duration_label = snapshot["duration"] + yolo_active = bool(os.getenv("HERMES_YOLO_MODE")) if width < 52: frags = [ @@ -3162,8 +3391,11 @@ def _get_status_bar_fragments(self): ("class:status-bar-strong", snapshot["model_short"]), ("class:status-bar-dim", " ยท "), ("class:status-bar-dim", duration_label), - ("class:status-bar", " "), ] + if yolo_active: + frags.append(("class:status-bar-dim", " ยท ")) + frags.append(("class:status-bar-yolo", "โš  YOLO")) + frags.append(("class:status-bar", " ")) else: percent = snapshot["context_percent"] percent_label = f"{percent}%" if percent is not None else "--" @@ -3181,8 +3413,11 @@ def _get_status_bar_fragments(self): frags.extend([ ("class:status-bar-dim", " ยท "), ("class:status-bar-dim", duration_label), - ("class:status-bar", " "), ]) + if yolo_active: + frags.append(("class:status-bar-dim", " ยท ")) + frags.append(("class:status-bar-yolo", "โš  YOLO")) + frags.append(("class:status-bar", " ")) else: if snapshot["context_length"]: ctx_total = _format_context_length(snapshot["context_length"]) @@ -3215,6 +3450,9 @@ def _get_status_bar_fragments(self): if prompt_elapsed: frags.append(("class:status-bar-dim", " โ”‚ ")) frags.append(("class:status-bar-dim", prompt_elapsed)) + if yolo_active: + frags.append(("class:status-bar-dim", " โ”‚ ")) + frags.append(("class:status-bar-yolo", "โš  YOLO")) frags.append(("class:status-bar", " ")) total_width = sum(self._status_bar_display_width(text) for _, text in frags) @@ -5961,6 +6199,38 @@ def _handle_resume_command(self, cmd_original: str) -> None: else: _cprint(f" โ†ป Resumed session {target_id}{title_part} โ€” no messages, starting fresh.") + def _handle_sessions_command(self, cmd_original: str) -> None: + """Handle /sessions [list|] โ€” browse or resume previous sessions. + + Without arguments, prints the same recent-sessions table that /resume + shows when called without a target, and tells the user how to resume. + With an explicit subcommand or target, delegates to the resume flow so + ``/sessions `` and ``/resume `` behave identically. + + The TUI ships an interactive picker overlay for this command; the + classic CLI prints an inline list because there is no equivalent + overlay primitive here. Without this handler the canonical name + ``sessions`` falls through ``process_command``'s elif chain and + prints ``Unknown command: sessions`` even though the command is + registered in the central COMMAND_REGISTRY. + """ + parts = cmd_original.split(None, 1) + arg = parts[1].strip() if len(parts) > 1 else "" + sub = arg.lower() + + # Bare /sessions or /sessions list โ€” show recent sessions inline. + if not arg or sub in {"list", "ls", "browse"}: + if not self._session_db: + from hermes_state import format_session_db_unavailable + _cprint(f" {format_session_db_unavailable()}") + return + if not self._show_recent_sessions(reason="sessions"): + _cprint(" (._.) No previous sessions yet.") + return + + # /sessions behaves the same as /resume . + self._handle_resume_command(f"/resume {arg}") + def _handle_branch_command(self, cmd_original: str) -> None: """Handle /branch [name] โ€” fork the current session into a new independent copy. @@ -7540,6 +7810,8 @@ def process_command(self, command: str) -> bool: self.new_session(title=title) elif canonical == "resume": self._handle_resume_command(cmd_original) + elif canonical == "sessions": + self._handle_sessions_command(cmd_original) elif canonical == "model": self._handle_model_switch(cmd_original) elif canonical == "codex-runtime": @@ -7913,8 +8185,8 @@ def _bg_thinking(text: str) -> None: from hermes_cli.skin_engine import get_active_skin _skin = get_active_skin() label = _skin.get_branding("response_label", "โš• Hermes") - _resp_color = _skin.get_color("response_border", "#CD7F32") - _resp_text = _skin.get_color("banner_text", "#FFF8DC") + _resp_color = _maybe_remap_for_light_mode(_skin.get_color("response_border", "#CD7F32")) + _resp_text = _maybe_remap_for_light_mode(_skin.get_color("banner_text", "#FFF8DC")) except Exception: label = "โš• Hermes" _resp_color = "#CD7F32" @@ -8515,7 +8787,8 @@ def _handle_skin_command(self, cmd: str): set_active_skin(new_skin) _ACCENT.reset() # Re-resolve ANSI color for the new skin - _DIM.reset() # Re-resolve dim/secondary ANSI color for the new skin + # _DIM is now a fixed dim+italic ANSI escape (terminal-default fg) + # so it doesn't need re-resolving on skin switch. if save_config_value("display.skin", new_skin): print(f" Skin set to: {new_skin} (saved)") else: @@ -10894,12 +11167,12 @@ def run_agent(): from hermes_cli.skin_engine import get_active_skin _skin = get_active_skin() label = _skin.get_branding("response_label", "โš• Hermes") - _resp_color = _skin.get_color("response_border", "#CD7F32") - _resp_text = _skin.get_color("banner_text", "#FFF8DC") + _resp_color = _maybe_remap_for_light_mode(_skin.get_color("response_border", "#CD7F32")) + _resp_text = _maybe_remap_for_light_mode(_skin.get_color("banner_text", "#FFF8DC")) except Exception: label = "โš• Hermes" - _resp_color = "#CD7F32" - _resp_text = "#FFF8DC" + _resp_color = _maybe_remap_for_light_mode("#CD7F32") + _resp_text = _maybe_remap_for_light_mode("#FFF8DC") is_error_response = result and (result.get("failed") or result.get("partial")) already_streamed = self._stream_started and self._stream_box_opened and not is_error_response @@ -11138,13 +11411,48 @@ def _get_tui_prompt_text(self) -> str: return "".join(text for _, text in self._get_tui_prompt_fragments()) def _build_tui_style_dict(self) -> dict[str, str]: - """Layer the active skin's prompt_toolkit colors over the base TUI style.""" + """Layer the active skin's prompt_toolkit colors over the base TUI style. + + Also rewrites any hex-color tokens in the resulting style strings + to their light-mode equivalents (via _LIGHT_MODE_REMAP) when the + terminal is detected as light. This makes the chrome readable + on cream Terminal.app backgrounds without per-skin overrides. + """ style_dict = dict(getattr(self, "_tui_style_base", {}) or {}) try: from hermes_cli.skin_engine import get_prompt_toolkit_style_overrides style_dict.update(get_prompt_toolkit_style_overrides()) except Exception: pass + # Light-mode remap on the style strings. Each value is a pt + # style string like "bg:#1a1a2e #C0C0C0 bold" โ€” split on space, + # rewrite any "#XXX" tokens (including "bg:#XXX") through the + # light-mode remap, rejoin. + # + # CRITICAL: skip the remap entirely when a style string already + # specifies its own bg (e.g. status-bar / completion-menu styles + # with `bg:#1a1a2e ...`). Those colors were tuned for that + # specific dark bg and remapping the FG to a dark equivalent + # would produce dark-on-dark (invisible). The terminal's BG + # mode is irrelevant โ€” what matters is the bg the style itself + # paints. + try: + if _detect_light_mode(): + def _remap_value(v: str) -> str: + if not v: + return v + tokens = v.split() + has_explicit_bg = any(t.startswith("bg:") for t in tokens) + if has_explicit_bg: + # The style paints its own bg โ€” leave its fg alone. + return v + return " ".join( + _maybe_remap_for_light_mode(t) if t.startswith("#") else t + for t in tokens + ) + style_dict = {k: _remap_value(v or "") for k, v in style_dict.items()} + except Exception: + pass return style_dict def _apply_tui_skin_style(self) -> bool: @@ -11230,6 +11538,13 @@ def _build_tui_layout_children( def run(self): """Run the interactive CLI loop with persistent input at bottom.""" + # Detect light/dark terminal mode now (before pt grabs the tty). + # Caches the result so subsequent _hex_to_ansi / style calls + # don't risk re-querying mid-render. + try: + _detect_light_mode() + except Exception: + pass # Push the entire TUI to the bottom of the terminal so the banner, # responses, and prompt all appear pinned to the bottom โ€” empty # space stays above, not below. This prints enough blank lines to @@ -12993,11 +13308,16 @@ def _get_voice_status(): # Style for the application self._tui_style_base = { - 'input-area': '#FFF8DC', - 'placeholder': '#555555 italic', - 'prompt': '#FFF8DC', + # Input area / prompt: empty style strings inherit the + # terminal's default foreground/background, so the typed + # text is readable in both light and dark Terminal.app + # color schemes. (Hardcoding a near-white #FFF8DC made + # input invisible on light backgrounds.) + 'input-area': '', + 'placeholder': '#888888 italic', + 'prompt': '', 'prompt-working': '#888888 italic', - 'hint': '#555555 italic', + 'hint': '#888888 italic', 'status-bar': 'bg:#1a1a2e #C0C0C0', 'status-bar-strong': 'bg:#1a1a2e #FFD700 bold', 'status-bar-dim': 'bg:#1a1a2e #8B8682', @@ -13005,6 +13325,7 @@ def _get_voice_status(): 'status-bar-warn': 'bg:#1a1a2e #FFD700 bold', 'status-bar-bad': 'bg:#1a1a2e #FF8C00 bold', 'status-bar-critical': 'bg:#1a1a2e #FF6B6B bold', + 'status-bar-yolo': 'bg:#1a1a2e #FF4444 bold', # Bronze horizontal rules around the input area 'input-rule': '#CD7F32', # Clipboard image attachment badges @@ -13056,19 +13377,70 @@ def _get_voice_status(): self._app = app # Store reference for clarify_callback # โ”€โ”€ Fix ghost status-bar lines on terminal resize โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ - # When the terminal shrinks (e.g. un-maximize), the emulator reflows - # the previously-rendered full-width rows (status bar, input rules) - # into multiple narrower rows. prompt_toolkit's _on_resize handler - # only cursor_up()s by the stored layout height, missing the extra - # rows created by reflow โ€” leaving ghost duplicates visible. + # Resize handling: monkey-patch prompt_toolkit's _output_screen_diff + # to suppress the deliberate "reserve vertical space" scroll-up. # - # It's not just column-shrink: widening, row-shrinking, and - # multiplexer-driven SIGWINCH-less redraws (cmux / tmux tab switch) - # all produce the same class of drift, where the renderer's tracked - # _cursor_pos.y no longer matches terminal reality. The only reliable - # recovery is a full screen-clear (\x1b[2J\x1b[H) before the next - # redraw, so we force one on every resize rather than trying to - # compute the exact drift. + # Background: prompt_toolkit's renderer (renderer.py L232-242) + # explicitly moves the cursor to the bottom of the canvas after + # painting "to make sure the terminal scrolls up, even when the + # lower lines of the canvas just contain whitespace". In + # non-fullscreen mode this scrolls chrome content (status bar, + # input rules) into terminal scrollback on every render. When + # the terminal column-shrinks, the emulator reflows the previously + # rendered full-width rows into multiple narrower rows that get + # pushed up โ€” leaving ghost duplicates AND polluting scrollback. + # Same issue as pt #29 (open since 2014), #1675, #1933. + # + # Surgical fix: wrap _output_screen_diff so that when its internal + # `if current_height > previous_screen.height` branch fires (the + # one that does the bottom-cursor-move), we make it fall through + # by inflating previous_screen.height first. + try: + import prompt_toolkit.renderer as _pt_renderer + from prompt_toolkit.renderer import _output_screen_diff as _orig_osd + + if not getattr(_pt_renderer, "_hermes_osd_patched", False): + def _patched_output_screen_diff( + app, output, screen, current_pos, color_depth, + previous_screen, last_style, is_done, full_screen, + attrs_for_style_string, style_string_has_style, + size, previous_width, + ): + """Wraps pt's _output_screen_diff to suppress the + reserve-vertical-space scroll (renderer.py L232-242). + + Strategy: ONLY when previous_screen is non-None and + its current height is genuinely smaller than the new + screen's height, inflate it to match. This prevents + the bottom-cursor-move at L242 without changing any + other code path's behavior. + + Critical: do NOT replace a None previous_screen with + a fresh Screen() โ€” that would skip the proper + reset_attributes()+erase_down() at L178-185 which + fires when previous_screen is None (first-paint / + width-change). Without that reset, ANSI styles + leak between renders. + """ + try: + if previous_screen is not None and hasattr(previous_screen, "height"): + if previous_screen.height < screen.height: + previous_screen.height = screen.height + except Exception: + pass + + return _orig_osd( + app, output, screen, current_pos, color_depth, + previous_screen, last_style, is_done, full_screen, + attrs_for_style_string, style_string_has_style, + size, previous_width, + ) + + _pt_renderer._output_screen_diff = _patched_output_screen_diff + _pt_renderer._hermes_osd_patched = True + except Exception: + pass + _original_on_resize = app._on_resize def _resize_clear_ghosts(): @@ -13110,16 +13482,8 @@ def process_loop(): # and watch pattern matches) while agent is idle. try: from tools.process_registry import process_registry - if not process_registry.completion_queue.empty(): - evt = process_registry.completion_queue.get_nowait() - # Skip if the agent already consumed this via wait/poll/log - _evt_sid = evt.get("session_id", "") - if evt.get("type") == "completion" and process_registry.is_completion_consumed(_evt_sid): - pass # already delivered via tool result - else: - _synth = _format_process_notification(evt) - if _synth: - self._pending_input.put(_synth) + for _evt, _synth in process_registry.drain_notifications(): + self._pending_input.put(_synth) except Exception: pass continue @@ -13227,15 +13591,8 @@ def _restart_recording(): # that arrived while the agent was running. try: from tools.process_registry import process_registry - while not process_registry.completion_queue.empty(): - evt = process_registry.completion_queue.get_nowait() - # Skip if the agent already consumed this via wait/poll/log - _evt_sid = evt.get("session_id", "") - if evt.get("type") == "completion" and process_registry.is_completion_consumed(_evt_sid): - continue # already delivered via tool result - _synth = _format_process_notification(evt) - if _synth: - self._pending_input.put(_synth) + for _evt, _synth in process_registry.drain_notifications(): + self._pending_input.put(_synth) except Exception: pass # Non-fatal โ€” don't break the main loop @@ -13367,6 +13724,30 @@ def _suppress_closed_loop_errors(loop, context): self._print_exit_summary() return + # On macOS with uv-managed Python, kqueue's selector cannot register + # fd 0, raising OSError(EINVAL) from kqueue.control() when prompt_toolkit + # calls loop.add_reader (#6393). Probe kqueue and, if it can't watch + # stdin, switch to a SelectSelector-backed event loop policy. + if sys.platform == "darwin": + try: + import selectors as _selectors + if hasattr(_selectors, "KqueueSelector"): + _kq = _selectors.KqueueSelector() + try: + _kq.register(0, _selectors.EVENT_READ) + _kq.unregister(0) + finally: + _kq.close() + except (OSError, ValueError, KeyError): + import asyncio as _aio_probe + import selectors as _selectors + + class _SelectEventLoopPolicy(_aio_probe.DefaultEventLoopPolicy): + def new_event_loop(self): + return _aio_probe.SelectorEventLoop(_selectors.SelectSelector()) + + _aio_probe.set_event_loop_policy(_SelectEventLoopPolicy()) + # Run the application with patch_stdout for proper output handling try: with patch_stdout(): @@ -13387,12 +13768,20 @@ def _suppress_closed_loop_errors(loop, context): except (KeyError, OSError) as _stdin_err: # Catch selector registration failures from broken stdin (#6393) # and I/O errors from broken stdout during interrupt (#13710). - if isinstance(_stdin_err, OSError) and getattr(_stdin_err, "errno", None) == errno.EIO: + _errno = getattr(_stdin_err, "errno", None) if isinstance(_stdin_err, OSError) else None + _msg = str(_stdin_err) + if _errno == errno.EIO: pass # suppress broken-stdout I/O errors on interrupt (#13710) - elif "is not registered" in str(_stdin_err) or "Bad file descriptor" in str(_stdin_err): + elif ( + _errno in (errno.EINVAL, errno.EBADF) + or "is not registered" in _msg + or "Bad file descriptor" in _msg + or "Invalid argument" in _msg + ): print( f"\nError: stdin is not usable ({_stdin_err}).\n" - "This can happen with certain Python installations (e.g. uv-managed cPython on macOS).\n" + "This can happen with certain Python installations (e.g. uv-managed cPython on macOS)\n" + "where kqueue cannot register fd 0.\n" "Try reinstalling Python via pyenv or Homebrew, then re-run: hermes setup" ) else: diff --git a/cron/jobs.py b/cron/jobs.py index 6b3bc0e66f90..c5da32d44d50 100644 --- a/cron/jobs.py +++ b/cron/jobs.py @@ -645,6 +645,44 @@ def get_job(job_id: str) -> Optional[Dict[str, Any]]: return None +class AmbiguousJobReference(LookupError): + """Raised when a job name matches more than one job.""" + + def __init__(self, ref: str, matches: List[Dict[str, Any]]): + self.ref = ref + self.matches = matches + ids = ", ".join(m["id"] for m in matches) + super().__init__( + f"Job name '{ref}' is ambiguous โ€” matches {len(matches)} jobs: {ids}. " + f"Use the job ID instead." + ) + + +def resolve_job_ref(ref: str) -> Optional[Dict[str, Any]]: + """Resolve a job reference (ID or name) to a job record. + + - Exact ID match wins (works even if a different job's name equals this ID). + - Otherwise, case-insensitive name match. + - If a name matches more than one job, raises AmbiguousJobReference so the + caller can surface the matching IDs rather than silently picking one. + """ + if not ref: + return None + jobs = load_jobs() + for job in jobs: + if job["id"] == ref: + return _normalize_job_record(job) + ref_lower = ref.lower() + name_matches = [j for j in jobs if (j.get("name") or "").lower() == ref_lower] + if not name_matches: + return None + if len(name_matches) > 1: + raise AmbiguousJobReference( + ref, [_normalize_job_record(j) for j in name_matches] + ) + return _normalize_job_record(name_matches[0]) + + def list_jobs(include_disabled: bool = False) -> List[Dict[str, Any]]: """List all jobs, optionally including disabled ones.""" jobs = [_normalize_job_record(j) for j in load_jobs()] @@ -702,9 +740,12 @@ def update_job(job_id: str, updates: Dict[str, Any]) -> Optional[Dict[str, Any]] def pause_job(job_id: str, reason: Optional[str] = None) -> Optional[Dict[str, Any]]: - """Pause a job without deleting it.""" + """Pause a job without deleting it. Accepts a job ID or name.""" + job = resolve_job_ref(job_id) + if not job: + return None return update_job( - job_id, + job["id"], { "enabled": False, "state": "paused", @@ -715,14 +756,14 @@ def pause_job(job_id: str, reason: Optional[str] = None) -> Optional[Dict[str, A def resume_job(job_id: str) -> Optional[Dict[str, Any]]: - """Resume a paused job and compute the next future run from now.""" - job = get_job(job_id) + """Resume a paused job and compute the next future run from now. Accepts a job ID or name.""" + job = resolve_job_ref(job_id) if not job: return None next_run_at = compute_next_run(job["schedule"]) return update_job( - job_id, + job["id"], { "enabled": True, "state": "scheduled", @@ -734,12 +775,12 @@ def resume_job(job_id: str) -> Optional[Dict[str, Any]]: def trigger_job(job_id: str) -> Optional[Dict[str, Any]]: - """Schedule a job to run on the next scheduler tick.""" - job = get_job(job_id) + """Schedule a job to run on the next scheduler tick. Accepts a job ID or name.""" + job = resolve_job_ref(job_id) if not job: return None return update_job( - job_id, + job["id"], { "enabled": True, "state": "scheduled", @@ -751,14 +792,18 @@ def trigger_job(job_id: str) -> Optional[Dict[str, Any]]: def remove_job(job_id: str) -> bool: - """Remove a job by ID.""" + """Remove a job by ID or name.""" + job = resolve_job_ref(job_id) + if not job: + return False + canonical_id = job["id"] jobs = load_jobs() original_len = len(jobs) - jobs = [j for j in jobs if j["id"] != job_id] + jobs = [j for j in jobs if j["id"] != canonical_id] if len(jobs) < original_len: save_jobs(jobs) # Clean up output directory to prevent orphaned dirs accumulating - job_output_dir = OUTPUT_DIR / job_id + job_output_dir = OUTPUT_DIR / canonical_id if job_output_dir.exists(): shutil.rmtree(job_output_dir) return True diff --git a/cron/scheduler.py b/cron/scheduler.py index b585ef2e42ba..51a81a4721f4 100644 --- a/cron/scheduler.py +++ b/cron/scheduler.py @@ -92,6 +92,7 @@ def _resolve_cron_enabled_toolsets(job: dict, cfg: dict) -> list[str] | None: "matrix", "mattermost", "homeassistant", "dingtalk", "feishu", "wecom", "wecom_callback", "weixin", "sms", "email", "webhook", "bluebubbles", "qqbot", "yuanbao", + "tlon", }) # Platforms that support a configured cron/notification home target, mapped to @@ -112,6 +113,7 @@ def _resolve_cron_enabled_toolsets(job: dict, cfg: dict) -> list[str] | None: "bluebubbles": "BLUEBUBBLES_HOME_CHANNEL", "qqbot": "QQBOT_HOME_CHANNEL", "whatsapp": "WHATSAPP_HOME_CHANNEL", + "tlon": "TLON_HOME_CHANNEL", } # Legacy env var names kept for back-compat. Each entry is the current diff --git a/environments/README.md b/environments/README.md deleted file mode 100644 index 3936e1f35bc6..000000000000 --- a/environments/README.md +++ /dev/null @@ -1,324 +0,0 @@ -# Hermes-Agent Atropos Environments - -This directory contains the integration layer between **hermes-agent's** tool-calling capabilities and the **Atropos** RL training framework. It provides everything needed to run agentic LLMs through multi-turn tool-calling loops, score their output with arbitrary reward functions, and feed results into Atropos for training or evaluation. - -## Architecture Overview - -``` - Atropos Framework - โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” - โ”‚ BaseEnv โ”‚ (atroposlib) - โ”‚ - Server management โ”‚ - โ”‚ - Worker scheduling โ”‚ - โ”‚ - Wandb logging โ”‚ - โ”‚ - CLI (serve/process/ โ”‚ - โ”‚ evaluate) โ”‚ - โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ - โ”‚ inherits - โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ดโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” - โ”‚ HermesAgentBaseEnv โ”‚ hermes_base_env.py - โ”‚ - Terminal backend โ”‚ - โ”‚ - Tool resolution โ”‚ - โ”‚ - Agent loop โ”‚ - โ”‚ - ToolContext โ”‚ - โ”‚ - Async patches โ”‚ - โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ - โ”‚ inherits - โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” - โ”‚ โ”‚ โ”‚ - TerminalTestEnv HermesSweEnv TerminalBench2EvalEnv - (stack testing) (SWE training) (TB2 benchmark eval) -``` - -### Inheritance Chain - -**BaseEnv** (from `atroposlib`) is the Atropos base class. It provides: -- Server management (OpenAI-compatible API servers, VLLM, SGLang) -- Worker scheduling for parallel rollouts -- Wandb integration for metrics and rollout logging -- CLI interface with three subcommands: `serve`, `process`, `evaluate` -- `evaluate_log()` for saving eval results to JSON + samples.jsonl - -**HermesAgentBaseEnv** (`hermes_base_env.py`) extends BaseEnv with hermes-agent specifics: -- Sets `os.environ["TERMINAL_ENV"]` to configure the terminal backend (local, docker, ssh, singularity, modal, daytona, vercel_sandbox) -- Resolves hermes-agent toolsets via `_resolve_tools_for_group()` (calls `get_tool_definitions()` which queries `tools/registry.py`) -- Implements `collect_trajectory()` which runs the full agent loop and computes rewards -- Supports two-phase operation (Phase 1: OpenAI server, Phase 2: VLLM ManagedServer) -- Applies monkey patches for async-safe tool operation at import time - -Concrete environments inherit from `HermesAgentBaseEnv` and implement: -- `setup()` -- Load dataset, initialize state -- `get_next_item()` -- Return the next item for rollout -- `format_prompt()` -- Convert a dataset item into the user message -- `compute_reward()` -- Score the rollout using ToolContext -- `evaluate()` -- Periodic evaluation logic - -## Core Components - -### Agent Loop (`agent_loop.py`) - -`HermesAgentLoop` is the reusable multi-turn agent engine. It runs the same pattern as hermes-agent's `run_agent.py`: - -1. Send messages + tools to the API via `server.chat_completion()` -2. If the response contains `tool_calls`, execute each one via `handle_function_call()` (which delegates to `tools/registry.py`'s `dispatch()`) -3. Append tool results to the conversation and go back to step 1 -4. If the response has no tool_calls, the agent is done - -Tool calls are executed in a thread pool (`run_in_executor`) so backends that use `asyncio.run()` internally (Modal, Docker) don't deadlock inside Atropos's event loop. - -Returns an `AgentResult` containing the full conversation history, turn count, reasoning content per turn, tool errors, and optional ManagedServer state (for Phase 2). - -### Tool Context (`tool_context.py`) - -`ToolContext` is a per-rollout handle that gives reward/verification functions direct access to **all** hermes-agent tools, scoped to the rollout's `task_id`. The same `task_id` means the terminal/browser session is the SAME one the model used during its rollout -- all state (files, processes, browser tabs) is preserved. - -```python -async def compute_reward(self, item, result, ctx: ToolContext): - # Run tests in the model's terminal sandbox - test = ctx.terminal("pytest -v") - if test["exit_code"] == 0: - return 1.0 - - # Check if a file was created - content = ctx.read_file("/workspace/solution.py") - if content.get("content"): - return 0.5 - - # Download files locally for verification (binary-safe) - ctx.download_file("/remote/output.bin", "/local/output.bin") - - return 0.0 -``` - -Available methods: -- **Terminal**: `terminal(command, timeout)` -- run shell commands -- **Files**: `read_file(path)`, `write_file(path, content)`, `search(query, path)` -- **Transfers**: `upload_file()`, `upload_dir()`, `download_file()`, `download_dir()` -- binary-safe file transfers between host and sandbox -- **Web**: `web_search(query)`, `web_extract(urls)` -- **Browser**: `browser_navigate(url)`, `browser_snapshot()` -- **Generic**: `call_tool(name, args)` -- call any hermes-agent tool by name -- **Cleanup**: `cleanup()` -- release all resources (called automatically after `compute_reward`) - -### Patches (`patches.py`) - -**Problem**: Some hermes-agent tools use `asyncio.run()` internally (e.g., the Modal backend). This crashes when called from inside Atropos's event loop because `asyncio.run()` cannot be nested. - -**Solution**: `ModalEnvironment` uses a dedicated `_AsyncWorker` background thread with its own event loop. The calling code sees a sync interface, but internally all async Modal SDK calls happen on the worker thread so they don't conflict with Atropos's loop. This is built directly into `tools/environments/modal.py` โ€” no monkey-patching required. - -`patches.py` is now a no-op (kept for backward compatibility with imports). - -### Tool Call Parsers (`tool_call_parsers/`) - -Client-side parsers that extract structured `tool_calls` from raw model output text. Used in **Phase 2** (VLLM server type) where ManagedServer's `/generate` endpoint returns raw text without tool call parsing. - -Each parser is a standalone reimplementation of the corresponding VLLM parser's `extract_tool_calls()` logic. No VLLM dependency -- only standard library (`re`, `json`, `uuid`) and `openai` types. - -Available parsers: -- `hermes` -- Hermes/ChatML `` XML format -- `mistral` -- Mistral `[TOOL_CALLS]` format -- `llama3_json` -- Llama 3 JSON tool calling -- `qwen` -- Qwen tool calling format -- `qwen3_coder` -- Qwen3 Coder format -- `deepseek_v3` -- DeepSeek V3 format -- `deepseek_v3_1` -- DeepSeek V3.1 format -- `kimi_k2` -- Kimi K2 format -- `longcat` -- Longcat format -- `glm45` / `glm47` -- GLM model formats - -Usage: -```python -from environments.tool_call_parsers import get_parser - -parser = get_parser("hermes") -content, tool_calls = parser.parse(raw_model_output) -``` - -In Phase 1 (OpenAI server type), these parsers are not needed -- the server handles tool call parsing natively. - -## Two-Phase Operation - -### Phase 1: OpenAI Server (Evaluation / SFT Data Generation) - -Uses `server.chat_completion()` with `tools=` parameter. The server (VLLM, SGLang, OpenRouter, OpenAI) handles tool call parsing natively. Returns `ChatCompletion` objects with structured `tool_calls`. - -- Good for: evaluation, SFT data generation, testing -- Run with: `serve` (with `run-api`), `process`, or `evaluate` subcommands -- Placeholder tokens are created for the Atropos pipeline - -### Phase 2: VLLM ManagedServer (Full RL Training) - -Uses ManagedServer for exact token IDs + logprobs via `/generate`. Client-side tool call parser (from `tool_call_parsers/`) reconstructs structured `tool_calls` from raw output. - -- Good for: full RL training with GRPO/PPO -- Run with: `serve` subcommand -- Real tokens, masks, and logprobs flow through the pipeline - -## Directory Structure - -``` -environments/ -โ”œโ”€โ”€ README.md # This file -โ”œโ”€โ”€ __init__.py # Package exports -โ”œโ”€โ”€ hermes_base_env.py # Abstract base (HermesAgentBaseEnv) -โ”œโ”€โ”€ agent_loop.py # Multi-turn agent engine (HermesAgentLoop) -โ”œโ”€โ”€ tool_context.py # Per-rollout tool access for reward functions -โ”œโ”€โ”€ patches.py # Async-safety patches for Modal backend -โ”‚ -โ”œโ”€โ”€ tool_call_parsers/ # Phase 2 client-side parsers -โ”‚ โ”œโ”€โ”€ __init__.py # Registry + base class -โ”‚ โ”œโ”€โ”€ hermes_parser.py -โ”‚ โ”œโ”€โ”€ mistral_parser.py -โ”‚ โ”œโ”€โ”€ llama_parser.py -โ”‚ โ”œโ”€โ”€ qwen_parser.py -โ”‚ โ”œโ”€โ”€ qwen3_coder_parser.py -โ”‚ โ”œโ”€โ”€ deepseek_v3_parser.py -โ”‚ โ”œโ”€โ”€ deepseek_v3_1_parser.py -โ”‚ โ”œโ”€โ”€ kimi_k2_parser.py -โ”‚ โ”œโ”€โ”€ longcat_parser.py -โ”‚ โ”œโ”€โ”€ glm45_parser.py -โ”‚ โ””โ”€โ”€ glm47_parser.py -โ”‚ -โ”œโ”€โ”€ terminal_test_env/ # Stack validation environment -โ”‚ โ””โ”€โ”€ terminal_test_env.py -โ”‚ -โ”œโ”€โ”€ hermes_swe_env/ # SWE-bench style training environment -โ”‚ โ””โ”€โ”€ hermes_swe_env.py -โ”‚ -โ””โ”€โ”€ benchmarks/ # Evaluation benchmarks - โ”œโ”€โ”€ terminalbench_2/ # 89 terminal tasks, Modal sandboxes - โ”‚ โ””โ”€โ”€ terminalbench2_env.py - โ”œโ”€โ”€ tblite/ # 100 calibrated tasks (fast TB2 proxy) - โ”‚ โ””โ”€โ”€ tblite_env.py - โ””โ”€โ”€ yc_bench/ # Long-horizon strategic benchmark - โ””โ”€โ”€ yc_bench_env.py -``` - -## Concrete Environments - -### TerminalTestEnv (`terminal_test_env/`) - -A self-contained environment with inline tasks (no external dataset needed) for validating the full stack end-to-end. Each task asks the model to create a file at a known path, and the verifier checks the content matches. - -```bash -# Serve mode (needs run-api) -run-api -python environments/terminal_test_env/terminal_test_env.py serve - -# Process mode (no run-api, saves to JSONL) -python environments/terminal_test_env/terminal_test_env.py process \ - --env.data_path_to_save_groups terminal_test_output.jsonl -``` - -### HermesSweEnv (`hermes_swe_env/`) - -SWE-bench style training environment. The model gets a coding task, uses terminal + file + web tools to solve it, and the reward function runs tests in the same Modal sandbox. - -```bash -python environments/hermes_swe_env/hermes_swe_env.py serve \ - --openai.model_name YourModel \ - --env.dataset_name bigcode/humanevalpack \ - --env.terminal_backend modal -``` - -### TerminalBench2EvalEnv (`benchmarks/terminalbench_2/`) - -**Eval-only** environment for the Terminal-Bench 2.0 benchmark (89 tasks). Each task gets a pre-built Docker Hub image, a natural language instruction, and a test suite. The agent uses terminal + file tools to solve the task, then the test suite verifies correctness. - -Follows the standard Atropos eval pattern (like GPQA, MMLU, etc.): -- Run via `evaluate` subcommand (no `run-api` needed) -- `setup()` loads the dataset, `evaluate()` runs all tasks -- `rollout_and_score_eval()` handles per-task agent loop + test verification -- Downloads verifier output locally for reliable reward checking (Harbor pattern) - -```bash -# Run full benchmark -python environments/benchmarks/terminalbench_2/terminalbench2_env.py evaluate \ - --openai.model_name anthropic/claude-opus-4.6 - -# Run subset of tasks -python environments/benchmarks/terminalbench_2/terminalbench2_env.py evaluate \ - --openai.model_name anthropic/claude-opus-4.6 \ - --env.task_filter fix-git,git-multibranch - -# Skip specific tasks -python environments/benchmarks/terminalbench_2/terminalbench2_env.py evaluate \ - --openai.model_name anthropic/claude-opus-4.6 \ - --env.skip_tasks heavy-task,slow-task -``` - -## Creating a New Environment - -### Training Environment - -1. Create a new directory under `environments/` -2. Create your env file inheriting from `HermesAgentBaseEnv` -3. Implement the four abstract methods + `evaluate()` - -```python -from environments.hermes_base_env import HermesAgentBaseEnv, HermesAgentEnvConfig - -class MyEnvConfig(HermesAgentEnvConfig): - pass # Add custom fields as needed - -class MyEnv(HermesAgentBaseEnv): - name = "my-env" - env_config_cls = MyEnvConfig - - @classmethod - def config_init(cls): - env_config = MyEnvConfig( - enabled_toolsets=["terminal", "file"], - terminal_backend="modal", - # ... other config - ) - server_configs = [APIServerConfig(...)] - return env_config, server_configs - - async def setup(self): - self.dataset = load_dataset(...) - self.iter = 0 - - async def get_next_item(self): - item = self.dataset[self.iter % len(self.dataset)] - self.iter += 1 - return item - - def format_prompt(self, item): - return item["instruction"] - - async def compute_reward(self, item, result, ctx): - # ctx gives you full tool access to the rollout's sandbox - test = ctx.terminal("pytest -v") - return 1.0 if test["exit_code"] == 0 else 0.0 - - async def evaluate(self, *args, **kwargs): - # Periodic evaluation logic - ... - -if __name__ == "__main__": - MyEnv.cli() -``` - -### Eval-Only Environment (Benchmark) - -For eval benchmarks, follow the pattern in `terminalbench2_env.py`: -1. Create under `environments/benchmarks/your-benchmark/` -2. Inherit from `HermesAgentBaseEnv` -3. Set eval-only config: `eval_handling=STOP_TRAIN`, `steps_per_eval=1`, `total_steps=1` -4. Stub the training methods (`collect_trajectories`, `score`) -5. Implement `rollout_and_score_eval()` and `evaluate()` -6. Run with `evaluate` subcommand - -## Key Config Fields - -| Field | Description | Default | -|-------|-------------|---------| -| `enabled_toolsets` | Which hermes toolsets to enable | `None` (all) | -| `disabled_toolsets` | Toolsets to disable | `None` | -| `distribution` | Probabilistic toolset distribution name | `None` | -| `max_agent_turns` | Max LLM calls per rollout | `30` | -| `agent_temperature` | Sampling temperature | `1.0` | -| `terminal_backend` | `local`, `docker`, `modal`, `daytona`, `ssh`, `singularity` | `local` | -| `system_prompt` | System message for the agent | `None` | -| `tool_call_parser` | Parser name for Phase 2 | `hermes` | -| `eval_handling` | `STOP_TRAIN`, `LIMIT_TRAIN`, `NONE` | `STOP_TRAIN` | diff --git a/environments/__init__.py b/environments/__init__.py deleted file mode 100644 index 282bc06b0b35..000000000000 --- a/environments/__init__.py +++ /dev/null @@ -1,36 +0,0 @@ -""" -Hermes-Agent Atropos Environments - -Provides a layered integration between hermes-agent's tool-calling capabilities -and the Atropos RL training framework. - -Core layers: - - agent_loop: Reusable multi-turn agent loop with standard OpenAI-spec tool calling - - tool_context: Per-rollout tool access handle for reward/verification functions - - hermes_base_env: Abstract base environment (BaseEnv subclass) for Atropos - - tool_call_parsers: Client-side tool call parser registry for Phase 2 (VLLM /generate) - -Concrete environments: - - terminal_test_env/: Simple file-creation tasks for testing the stack - - hermes_swe_env/: SWE-bench style tasks with Modal sandboxes - -Benchmarks (eval-only): - - benchmarks/terminalbench_2/: Terminal-Bench 2.0 evaluation -""" - -try: - from environments.agent_loop import AgentResult, HermesAgentLoop - from environments.tool_context import ToolContext - from environments.hermes_base_env import HermesAgentBaseEnv, HermesAgentEnvConfig -except ImportError: - # atroposlib not installed โ€” environments are unavailable but - # submodules like tool_call_parsers can still be imported directly. - pass - -__all__ = [ - "AgentResult", - "HermesAgentLoop", - "ToolContext", - "HermesAgentBaseEnv", - "HermesAgentEnvConfig", -] diff --git a/environments/agent_loop.py b/environments/agent_loop.py deleted file mode 100644 index 7ca3a0f6ddbf..000000000000 --- a/environments/agent_loop.py +++ /dev/null @@ -1,534 +0,0 @@ -""" -HermesAgentLoop -- Reusable Multi-Turn Agent Engine - -Runs the hermes-agent tool-calling loop using standard OpenAI-spec tool calling. -Works with any server that returns ChatCompletion objects with tool_calls: - - Phase 1: OpenAI server type (VLLM, SGLang, OpenRouter, OpenAI API) - - Phase 2: ManagedServer with client-side tool call parser - -The loop passes tools= and checks response.choices[0].message.tool_calls, -identical to hermes-agent's run_agent.py. Tool execution is dispatched via -handle_function_call() from model_tools.py. -""" - -import asyncio -import concurrent.futures -import json -import logging -import os -import uuid -from dataclasses import dataclass, field -from typing import Any, Dict, List, Optional, Set - -from model_tools import handle_function_call -from tools.terminal_tool import get_active_env -from tools.tool_result_storage import maybe_persist_tool_result, enforce_turn_budget - -# Thread pool for running sync tool calls that internally use asyncio.run() -# (e.g., the Modal/Docker/Daytona terminal backends). Running them in a separate -# thread gives them a clean event loop so they don't deadlock inside Atropos's loop. -# Size must be large enough for concurrent eval tasks (e.g., 89 TB2 tasks all -# making tool calls). Too small = thread pool starvation, tasks queue for minutes. -# Resized at runtime by HermesAgentBaseEnv.__init__ via resize_tool_pool(). -_tool_executor = concurrent.futures.ThreadPoolExecutor(max_workers=128) - - -def resize_tool_pool(max_workers: int): - """ - Replace the global tool executor with a new one of the given size. - - Called by HermesAgentBaseEnv.__init__ based on config.tool_pool_size. - Safe to call before any tasks are submitted. - """ - global _tool_executor - old_executor = _tool_executor - _tool_executor = concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) - old_executor.shutdown(wait=False) - logger.info("Tool thread pool resized to %d workers", max_workers) - -logger = logging.getLogger(__name__) - - -@dataclass -class ToolError: - """Record of a tool execution error during the agent loop.""" - - turn: int # Which turn the error occurred on - tool_name: str # Which tool was called - arguments: str # The arguments passed (truncated) - error: str # The error message - tool_result: str # The raw result returned to the model - - -@dataclass -class AgentResult: - """Result of running the agent loop.""" - - # Full conversation history in OpenAI message format - messages: List[Dict[str, Any]] - # ManagedServer.get_state() if available (Phase 2), None otherwise - managed_state: Optional[Dict[str, Any]] = None - # How many LLM calls were made - turns_used: int = 0 - # True if model stopped calling tools naturally (vs hitting max_turns) - finished_naturally: bool = False - # Extracted reasoning content per turn (from PR #297 helpers) - reasoning_per_turn: List[Optional[str]] = field(default_factory=list) - # Tool errors encountered during the loop - tool_errors: List[ToolError] = field(default_factory=list) - - -def _extract_reasoning_from_message(message) -> Optional[str]: - """ - Extract reasoning content from a ChatCompletion message. - - Handles multiple provider formats: - 1. message.reasoning_content field (some providers) - 2. message.reasoning field (some providers) - 3. message.reasoning_details[].text (OpenRouter style) - - Note: block extraction from content is NOT done here -- that's - handled by the response already in Phase 1 (server does it) or by - ManagedServer's patch in Phase 2. - - Args: - message: The assistant message from ChatCompletion response - - Returns: - Extracted reasoning text, or None if not found - """ - # Check reasoning_content field (common across providers) - if hasattr(message, "reasoning_content") and message.reasoning_content: - return message.reasoning_content - - # Check reasoning field - if hasattr(message, "reasoning") and message.reasoning: - return message.reasoning - - # Check reasoning_details (OpenRouter style) - if hasattr(message, "reasoning_details") and message.reasoning_details: - for detail in message.reasoning_details: - if hasattr(detail, "text") and detail.text: - return detail.text - if isinstance(detail, dict) and detail.get("text"): - return detail["text"] - - return None - - -class HermesAgentLoop: - """ - Runs hermes-agent's tool-calling loop using standard OpenAI-spec tool calling. - - Same pattern as run_agent.py: - - Pass tools= to the API - - Check response.choices[0].message.tool_calls - - Dispatch via handle_function_call() - - Works identically with any server type -- OpenAI, VLLM, SGLang, OpenRouter, - or ManagedServer with a parser. The server determines how tool_calls get - populated on the response. - """ - - def __init__( - self, - server, - tool_schemas: List[Dict[str, Any]], - valid_tool_names: Set[str], - max_turns: int = 30, - task_id: Optional[str] = None, - temperature: float = 1.0, - max_tokens: Optional[int] = None, - extra_body: Optional[Dict[str, Any]] = None, - budget_config: Optional["BudgetConfig"] = None, - ): - """ - Initialize the agent loop. - - Args: - server: Server object with chat_completion() method (OpenAIServer, - ManagedServer, ServerManager, etc.) - tool_schemas: OpenAI-format tool definitions from get_tool_definitions() - valid_tool_names: Set of tool names the model is allowed to call - max_turns: Maximum number of LLM calls before stopping - task_id: Unique ID for terminal/browser session isolation - temperature: Sampling temperature for generation - max_tokens: Max tokens per generation (None for server default) - extra_body: Extra parameters passed to the OpenAI client's create() call. - Used for OpenRouter provider preferences, transforms, etc. - e.g. {"provider": {"ignore": ["DeepInfra"]}} - budget_config: Tool result persistence budget. Controls per-tool - thresholds, per-turn aggregate budget, and preview size. - If None, uses DEFAULT_BUDGET (current hardcoded values). - """ - from tools.budget_config import DEFAULT_BUDGET - self.server = server - self.tool_schemas = tool_schemas - self.valid_tool_names = valid_tool_names - self.max_turns = max_turns - self.task_id = task_id or str(uuid.uuid4()) - self.temperature = temperature - self.max_tokens = max_tokens - self.extra_body = extra_body - self.budget_config = budget_config or DEFAULT_BUDGET - - async def run(self, messages: List[Dict[str, Any]]) -> AgentResult: - """ - Execute the full agent loop using standard OpenAI tool calling. - - Args: - messages: Initial conversation messages (system + user). - Modified in-place as the conversation progresses. - - Returns: - AgentResult with full conversation history, managed state, and metadata - """ - reasoning_per_turn = [] - tool_errors: List[ToolError] = [] - - # Per-loop TodoStore for the todo tool (ephemeral, dies with the loop) - from tools.todo_tool import TodoStore, todo_tool as _todo_tool - _todo_store = TodoStore() - - # Extract user task from first user message for browser_snapshot context - _user_task = None - for msg in messages: - if msg.get("role") == "user": - content = msg.get("content", "") - if isinstance(content, str) and content.strip(): - _user_task = content.strip()[:500] # Cap to avoid huge strings - break - - import time as _time - - for turn in range(self.max_turns): - turn_start = _time.monotonic() - - # Build the chat_completion kwargs - chat_kwargs = { - "messages": messages, - "n": 1, - "temperature": self.temperature, - } - - # Only pass tools if we have them - if self.tool_schemas: - chat_kwargs["tools"] = self.tool_schemas - - # Only pass max_tokens if explicitly set - if self.max_tokens is not None: - chat_kwargs["max_tokens"] = self.max_tokens - - # Inject extra_body for provider-specific params (e.g., OpenRouter - # provider preferences like banned/preferred providers, transforms) - if self.extra_body: - chat_kwargs["extra_body"] = self.extra_body - - # Make the API call -- standard OpenAI spec - api_start = _time.monotonic() - try: - response = await self.server.chat_completion(**chat_kwargs) - except Exception as e: - api_elapsed = _time.monotonic() - api_start - logger.error("API call failed on turn %d (%.1fs): %s", turn + 1, api_elapsed, e) - return AgentResult( - messages=messages, - managed_state=self._get_managed_state(), - turns_used=turn + 1, - finished_naturally=False, - reasoning_per_turn=reasoning_per_turn, - tool_errors=tool_errors, - ) - - api_elapsed = _time.monotonic() - api_start - - if not response or not response.choices: - logger.warning("Empty response on turn %d (api=%.1fs)", turn + 1, api_elapsed) - return AgentResult( - messages=messages, - managed_state=self._get_managed_state(), - turns_used=turn + 1, - finished_naturally=False, - reasoning_per_turn=reasoning_per_turn, - tool_errors=tool_errors, - ) - - assistant_msg = response.choices[0].message - - # Extract reasoning content from the response (all provider formats) - reasoning = _extract_reasoning_from_message(assistant_msg) - reasoning_per_turn.append(reasoning) - - # Check for tool calls -- standard OpenAI spec. - # Fallback: if response has no structured tool_calls but content - # contains raw tool call tags (e.g. ), parse them using - # hermes-agent's standalone parsers. This handles the case where - # ManagedServer's ToolCallTranslator couldn't parse because vLLM - # isn't installed. - if ( - not assistant_msg.tool_calls - and assistant_msg.content - and self.tool_schemas - and "" in (assistant_msg.content or "") - ): - try: - from environments.tool_call_parsers import get_parser - fallback_parser = get_parser("hermes") - parsed_content, parsed_calls = fallback_parser.parse( - assistant_msg.content - ) - if parsed_calls: - assistant_msg.tool_calls = parsed_calls - if parsed_content is not None: - assistant_msg.content = parsed_content - logger.debug( - "Fallback parser extracted %d tool calls from raw content", - len(parsed_calls), - ) - except Exception: - pass # Fall through to no tool calls - - if assistant_msg.tool_calls: - # Normalize tool calls to dicts โ€” they may come as objects - # (OpenAI API) or dicts (vLLM ToolCallTranslator). - def _tc_to_dict(tc): - if isinstance(tc, dict): - return { - "id": tc.get("id", f"call_{uuid.uuid4().hex[:8]}"), - "type": "function", - "function": { - "name": tc.get("function", {}).get("name", tc.get("name", "")), - "arguments": tc.get("function", {}).get("arguments", tc.get("arguments", "{}")), - }, - } - return { - "id": tc.id, - "type": "function", - "function": { - "name": tc.function.name, - "arguments": tc.function.arguments, - }, - } - - # Build the assistant message dict for conversation history - msg_dict: Dict[str, Any] = { - "role": "assistant", - "content": assistant_msg.content or "", - "tool_calls": [_tc_to_dict(tc) for tc in assistant_msg.tool_calls], - } - - # Preserve reasoning_content for multi-turn chat template handling - # (e.g., Kimi-K2's template renders blocks differently - # for history vs. the latest turn based on this field) - if reasoning: - msg_dict["reasoning_content"] = reasoning - - messages.append(msg_dict) - - # Execute each tool call via hermes-agent's dispatch - for tc in assistant_msg.tool_calls: - # Handle both object (OpenAI) and dict (vLLM) formats - if isinstance(tc, dict): - tool_name = tc.get("function", {}).get("name", tc.get("name", "")) - tool_args_raw = tc.get("function", {}).get("arguments", tc.get("arguments", "{}")) - else: - tool_name = tc.function.name - tool_args_raw = tc.function.arguments - - # Validate tool name - if tool_name not in self.valid_tool_names: - tool_result = json.dumps( - { - "error": f"Unknown tool '{tool_name}'. " - f"Available tools: {sorted(self.valid_tool_names)}" - } - ) - tool_errors.append(ToolError( - turn=turn + 1, tool_name=tool_name, - arguments=tool_args_raw[:200], - error=f"Unknown tool '{tool_name}'", - tool_result=tool_result, - )) - logger.warning( - "Model called unknown tool '%s' on turn %d", - tool_name, turn + 1, - ) - else: - # Parse arguments - try: - args = json.loads(tool_args_raw) - except json.JSONDecodeError as e: - args = None - tool_result = json.dumps( - {"error": f"Invalid JSON in tool arguments: {e}. Please retry with valid JSON."} - ) - tool_errors.append(ToolError( - turn=turn + 1, tool_name=tool_name, - arguments=tool_args_raw[:200], - error=f"Invalid JSON: {e}", - tool_result=tool_result, - )) - logger.warning( - "Invalid JSON in tool call arguments for '%s': %s", - tool_name, tool_args_raw[:200], - ) - - # Dispatch tool only if arguments parsed successfully - if args is not None: - try: - if tool_name == "terminal": - backend = os.getenv("TERMINAL_ENV", "local") - cmd_preview = args.get("command", "")[:80] - logger.info( - "[%s] $ %s", self.task_id[:8], cmd_preview, - ) - - tool_submit_time = _time.monotonic() - - # Todo tool -- handle locally (needs per-loop TodoStore) - if tool_name == "todo": - tool_result = _todo_tool( - todos=args.get("todos"), - merge=args.get("merge", False), - store=_todo_store, - ) - tool_elapsed = _time.monotonic() - tool_submit_time - elif tool_name == "memory": - tool_result = json.dumps({"error": "Memory is not available in RL environments."}) - tool_elapsed = _time.monotonic() - tool_submit_time - elif tool_name == "session_search": - tool_result = json.dumps({"error": "Session search is not available in RL environments."}) - tool_elapsed = _time.monotonic() - tool_submit_time - else: - # Run tool calls in a thread pool so backends that - # use asyncio.run() internally (modal, docker, daytona) get - # a clean event loop instead of deadlocking. - loop = asyncio.get_running_loop() - # Capture current tool_name/args for the lambda - _tn, _ta, _tid = tool_name, args, self.task_id - tool_result = await loop.run_in_executor( - _tool_executor, - lambda: handle_function_call( - _tn, _ta, task_id=_tid, - user_task=_user_task, - ), - ) - tool_elapsed = _time.monotonic() - tool_submit_time - - # Log slow tools and thread pool stats for debugging - pool_active = _tool_executor._work_queue.qsize() - if tool_elapsed > 30: - logger.warning( - "[%s] turn %d: %s took %.1fs (pool queue=%d)", - self.task_id[:8], turn + 1, tool_name, - tool_elapsed, pool_active, - ) - except Exception as e: - tool_result = json.dumps( - {"error": f"Tool execution failed: {type(e).__name__}: {str(e)}"} - ) - tool_errors.append(ToolError( - turn=turn + 1, tool_name=tool_name, - arguments=tool_args_raw[:200], - error=f"{type(e).__name__}: {str(e)}", - tool_result=tool_result, - )) - logger.error( - "Tool '%s' execution failed on turn %d: %s", - tool_name, turn + 1, e, - ) - - # Also check if the tool returned an error in its JSON result - try: - result_data = json.loads(tool_result) - if isinstance(result_data, dict): - err = result_data.get("error") - exit_code = result_data.get("exit_code") - if err and exit_code and exit_code < 0: - tool_errors.append(ToolError( - turn=turn + 1, tool_name=tool_name, - arguments=tool_args_raw[:200], - error=str(err), - tool_result=tool_result[:500], - )) - except (json.JSONDecodeError, TypeError): - pass - - tc_id = tc.get("id", "") if isinstance(tc, dict) else tc.id - tool_result = maybe_persist_tool_result( - content=tool_result, - tool_name=tool_name, - tool_use_id=tc_id, - env=get_active_env(self.task_id), - config=self.budget_config, - ) - - messages.append( - { - "role": "tool", - "tool_call_id": tc_id, - "content": tool_result, - } - ) - - num_tcs = len(assistant_msg.tool_calls) - if num_tcs > 0: - enforce_turn_budget( - messages[-num_tcs:], - env=get_active_env(self.task_id), - config=self.budget_config, - ) - - turn_elapsed = _time.monotonic() - turn_start - logger.info( - "[%s] turn %d: api=%.1fs, %d tools, turn_total=%.1fs", - self.task_id[:8], turn + 1, api_elapsed, - len(assistant_msg.tool_calls), turn_elapsed, - ) - - else: - # No tool calls -- model is done - msg_dict = { - "role": "assistant", - "content": assistant_msg.content or "", - } - if reasoning: - msg_dict["reasoning_content"] = reasoning - messages.append(msg_dict) - - turn_elapsed = _time.monotonic() - turn_start - logger.info( - "[%s] turn %d: api=%.1fs, no tools (finished), turn_total=%.1fs", - self.task_id[:8], turn + 1, api_elapsed, turn_elapsed, - ) - - return AgentResult( - messages=messages, - managed_state=self._get_managed_state(), - turns_used=turn + 1, - finished_naturally=True, - reasoning_per_turn=reasoning_per_turn, - tool_errors=tool_errors, - ) - - # Hit max turns without the model stopping - logger.info("Agent hit max_turns (%d) without finishing", self.max_turns) - return AgentResult( - messages=messages, - managed_state=self._get_managed_state(), - turns_used=self.max_turns, - finished_naturally=False, - reasoning_per_turn=reasoning_per_turn, - tool_errors=tool_errors, - ) - - def _get_managed_state(self) -> Optional[Dict[str, Any]]: - """ - Get ManagedServer state if the server supports it. - - Returns state dict with SequenceNodes containing tokens/logprobs/masks, - or None if the server doesn't support get_state() (e.g., regular OpenAI server). - """ - if hasattr(self.server, "get_state"): - return self.server.get_state() - return None diff --git a/environments/agentic_opd_env.py b/environments/agentic_opd_env.py deleted file mode 100644 index c6ed88756bf2..000000000000 --- a/environments/agentic_opd_env.py +++ /dev/null @@ -1,1214 +0,0 @@ -""" -AgenticOPDEnv โ€” On-Policy Distillation for Agentic Tool-Calling Tasks -===================================================================== - -First Atropos environment to populate the distill_token_ids / distill_logprobs -fields on ScoredDataGroup, enabling on-policy distillation (OPD) training. - -Key idea (from OpenClaw-RL, Princeton 2026): - Every time an agent receives a next-state signal (tool result, error trace, - test verdict), that signal contains hindsight information about how the - agent's PREVIOUS response could have been better. This environment: - - 1. Runs standard agentic rollouts (tool-calling agent loop) - 2. Walks the conversation to find (assistant_turn, next_state) pairs - 3. Uses an LLM judge to extract "hints" from next-state signals - 4. Builds an enhanced prompt (original context + hint) - 5. Scores the student's response tokens under the enhanced distribution - using VLLM's prompt_logprobs (via Atropos's get_logprobs API) - 6. Packages the teacher's top-K predictions as distill_token_ids / - distill_logprobs on the ScoredDataGroup - -The trainer then computes per-token advantages: - A_t = teacher_logprob(token_t) - student_logprob(token_t) - Positive โ†’ teacher approves this token (upweight) - Negative โ†’ teacher disapproves (downweight) - -This gives dense, token-level training signal from every tool interaction, -instead of just a scalar reward at the end of the trajectory. - -Task: Coding tasks with test verification (rich next-state signals from -test results, error messages, terminal output). Falls back to built-in -coding problems if no HuggingFace dataset is configured. - -Requirements: - - VLLM backend (server_type: vllm) โ€” needed for prompt logprob scoring - - Phase 2 mode (ManagedServer) โ€” needed for token-level tracking - -Usage: - # Process mode (offline data generation with OPD) - python environments/agentic_opd_env.py process \\ - --env.total_steps 10 --env.group_size 2 \\ - --env.data_path_to_save_groups output.jsonl \\ - --openai.base_url http://localhost:8000/v1 \\ - --openai.model_name Qwen/Qwen3-4B - - # Serve mode (connected to Atropos trainer) - python environments/agentic_opd_env.py serve \\ - --openai.base_url http://localhost:8000/v1 \\ - --openai.model_name Qwen/Qwen3-4B - - # Evaluate mode - python environments/agentic_opd_env.py evaluate \\ - --env.eval_size 10 \\ - --openai.base_url http://localhost:8000/v1 \\ - --openai.model_name Qwen/Qwen3-4B - -Reference: Wang et al., "OpenClaw-RL: Train Any Agent Simply by Talking" - arXiv:2603.10165, March 2026 -""" - -from __future__ import annotations - -import asyncio -import copy -import json -import logging -import os -import random -import re -import sys -import time -import uuid -from pathlib import Path -from typing import Any, Dict, List, Optional, Set, Tuple, Union - -from pydantic import Field - -# Ensure hermes-agent root is on path -_repo_root = Path(__file__).resolve().parent.parent -if str(_repo_root) not in sys.path: - sys.path.insert(0, str(_repo_root)) - -from atroposlib.envs.base import ScoredDataGroup, ScoredDataItem -from atroposlib.envs.server_handling.server_manager import APIServerConfig -from atroposlib.type_definitions import Item - -from environments.hermes_base_env import HermesAgentBaseEnv, HermesAgentEnvConfig -from environments.agent_loop import AgentResult, HermesAgentLoop -from environments.tool_context import ToolContext - -logger = logging.getLogger(__name__) - - -# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• -# Built-in coding tasks (fallback when no HF dataset is configured) -# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• - -BUILTIN_CODING_TASKS = [ - { - "task": "Write a Python function `fizzbuzz(n)` that returns a list of strings from 1 to n. " - "For multiples of 3 return 'Fizz', for multiples of 5 return 'Buzz', " - "for multiples of both return 'FizzBuzz', otherwise the number as a string.", - "test_code": ( - "from solution import fizzbuzz\n" - "assert fizzbuzz(15) == ['1','2','Fizz','4','Buzz','Fizz','7','8','Fizz','Buzz','11','Fizz','13','14','FizzBuzz']\n" - "assert fizzbuzz(1) == ['1']\n" - "assert fizzbuzz(0) == []\n" - "print('All tests passed!')\n" - ), - "difficulty": "easy", - }, - { - "task": "Write a Python function `is_palindrome(s)` that checks if a string is a palindrome, " - "ignoring case and non-alphanumeric characters. Return True or False.", - "test_code": ( - "from solution import is_palindrome\n" - "assert is_palindrome('A man, a plan, a canal: Panama') == True\n" - "assert is_palindrome('race a car') == False\n" - "assert is_palindrome('') == True\n" - "assert is_palindrome('Was it a car or a cat I saw?') == True\n" - "print('All tests passed!')\n" - ), - "difficulty": "easy", - }, - { - "task": "Write a Python function `two_sum(nums, target)` that returns the indices of the two " - "numbers in `nums` that add up to `target`. Assume exactly one solution exists. " - "Return a list of two indices [i, j] where i < j.", - "test_code": ( - "from solution import two_sum\n" - "assert two_sum([2, 7, 11, 15], 9) == [0, 1]\n" - "assert two_sum([3, 2, 4], 6) == [1, 2]\n" - "assert two_sum([3, 3], 6) == [0, 1]\n" - "print('All tests passed!')\n" - ), - "difficulty": "easy", - }, - { - "task": "Write a Python function `flatten(lst)` that takes an arbitrarily nested list and " - "returns a flat list of all elements. For example, flatten([1, [2, [3, 4], 5]]) " - "should return [1, 2, 3, 4, 5].", - "test_code": ( - "from solution import flatten\n" - "assert flatten([1, [2, [3, 4], 5]]) == [1, 2, 3, 4, 5]\n" - "assert flatten([]) == []\n" - "assert flatten([1, 2, 3]) == [1, 2, 3]\n" - "assert flatten([[[[1]]]]) == [1]\n" - "assert flatten([1, [2], [[3]], [[[4]]]]) == [1, 2, 3, 4]\n" - "print('All tests passed!')\n" - ), - "difficulty": "medium", - }, - { - "task": "Write a Python function `longest_common_prefix(strs)` that finds the longest " - "common prefix string amongst a list of strings. If there is no common prefix, " - "return an empty string.", - "test_code": ( - "from solution import longest_common_prefix\n" - "assert longest_common_prefix(['flower', 'flow', 'flight']) == 'fl'\n" - "assert longest_common_prefix(['dog', 'racecar', 'car']) == ''\n" - "assert longest_common_prefix(['interspecies', 'interstellar', 'interstate']) == 'inters'\n" - "assert longest_common_prefix(['a']) == 'a'\n" - "assert longest_common_prefix([]) == ''\n" - "print('All tests passed!')\n" - ), - "difficulty": "easy", - }, - { - "task": "Write a Python function `group_anagrams(strs)` that groups anagrams together. " - "Return a list of lists, where each inner list contains strings that are anagrams of " - "each other. The order of groups and strings within groups does not matter.", - "test_code": ( - "from solution import group_anagrams\n" - "result = group_anagrams(['eat', 'tea', 'tan', 'ate', 'nat', 'bat'])\n" - "result_sorted = sorted([sorted(g) for g in result])\n" - "assert result_sorted == [['ate', 'eat', 'tea'], ['bat'], ['nat', 'tan']]\n" - "assert group_anagrams([]) == []\n" - "assert group_anagrams(['a']) == [['a']]\n" - "print('All tests passed!')\n" - ), - "difficulty": "medium", - }, - { - "task": "Write a Python function `valid_parentheses(s)` that determines if a string " - "containing just '(', ')', '{', '}', '[' and ']' is valid. A string is valid if " - "open brackets are closed by the same type and in the correct order.", - "test_code": ( - "from solution import valid_parentheses\n" - "assert valid_parentheses('()') == True\n" - "assert valid_parentheses('()[]{}') == True\n" - "assert valid_parentheses('(]') == False\n" - "assert valid_parentheses('([)]') == False\n" - "assert valid_parentheses('{[]}') == True\n" - "assert valid_parentheses('') == True\n" - "print('All tests passed!')\n" - ), - "difficulty": "easy", - }, - { - "task": "Write a Python function `merge_intervals(intervals)` that merges overlapping " - "intervals. Each interval is a list [start, end]. Return the merged intervals sorted " - "by start time.", - "test_code": ( - "from solution import merge_intervals\n" - "assert merge_intervals([[1,3],[2,6],[8,10],[15,18]]) == [[1,6],[8,10],[15,18]]\n" - "assert merge_intervals([[1,4],[4,5]]) == [[1,5]]\n" - "assert merge_intervals([[1,4],[0,4]]) == [[0,4]]\n" - "assert merge_intervals([]) == []\n" - "assert merge_intervals([[1,2]]) == [[1,2]]\n" - "print('All tests passed!')\n" - ), - "difficulty": "medium", - }, -] - - -# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• -# Hint extraction prompts (adapted from OpenClaw-RL) -# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• - -_HINT_JUDGE_SYSTEM = ( - "You are a process reward model used for hindsight hint extraction.\n" - "You are given:\n" - "1) The assistant response at turn t.\n" - "2) The next state at turn t+1, along with its **role**.\n\n" - "## Understanding the next state's role\n" - "- role='user': A reply from the user (follow-up, correction, new request, etc.).\n" - "- role='tool': The return value of a tool the assistant invoked. " - "This content was NOT available before the assistant's action โ€” " - "it exists BECAUSE the assistant called the tool. " - "A successful, non-error tool output generally means the assistant's " - "action was appropriate; do NOT treat it as information the assistant " - "should have already known.\n\n" - "Your goal is to decide whether the next state reveals useful hindsight information\n" - "that could have helped improve the assistant response at turn t.\n\n" - "Output format rules (strict):\n" - "- You MUST include exactly one final decision token: \\boxed{1} or \\boxed{-1}.\n" - "- If and only if decision is \\boxed{1}, provide a concise, information-dense hint in 1-3 sentences,\n" - " wrapped between [HINT_START] and [HINT_END].\n" - "- If decision is \\boxed{-1}, do not provide a hint block.\n" - "- Hint must be concrete and actionable for improving the previous response." -) - -_BOXED_RE = re.compile(r"\\boxed\{(-?\d+)\}") -_HINT_RE = re.compile(r"\[HINT_START\](.*?)\[HINT_END\]", re.DOTALL) - - -def _build_hint_judge_messages( - response_text: str, next_state_text: str, next_state_role: str = "tool" -) -> list[dict]: - """Build messages for the hint extraction judge.""" - user = ( - f"## Assistant response (turn t)\n{response_text}\n\n" - f"## Next state (turn t+1) [role: {next_state_role}]\n{next_state_text}\n\n" - "Now output your decision and (if positive) the hint in the required format." - ) - return [ - {"role": "system", "content": _HINT_JUDGE_SYSTEM}, - {"role": "user", "content": user}, - ] - - -def _parse_hint_result(text: str) -> tuple[int | None, str]: - """Parse the judge's boxed decision and hint text.""" - boxed = _BOXED_RE.findall(text) - score = int(boxed[-1]) if boxed else None - if score not in {1, -1}: - score = None - hint_matches = _HINT_RE.findall(text) - hint = hint_matches[-1].strip() if hint_matches else "" - return score, hint - - -def _select_best_hint(votes: list[dict]) -> dict | None: - """Select the best hint from majority-voted judge results.""" - good = [ - v - for v in votes - if v.get("score") == 1 - and isinstance(v.get("hint"), str) - and len(v["hint"].strip()) > 10 - ] - if not good: - return None - return max(good, key=lambda v: len(v["hint"].strip())) - - -def _append_hint_to_messages(messages: list[dict], hint: str) -> list[dict]: - """Clone messages and append hint to the last user message.""" - cloned = copy.deepcopy(messages) - if not cloned: - return [{"role": "user", "content": f"[user's hint / instruction]\n{hint}"}] - - # Find last user message - target_idx = None - for i in range(len(cloned) - 1, -1, -1): - if cloned[i].get("role") == "user": - target_idx = i - break - if target_idx is None: - target_idx = len(cloned) - 1 - - content = cloned[target_idx].get("content", "") - if isinstance(content, list): - content = " ".join( - c.get("text", "") if isinstance(c, dict) else str(c) for c in content - ) - suffix = f"\n\n[user's hint / instruction]\n{hint.strip()}" - cloned[target_idx]["content"] = (content + suffix).strip() - return cloned - - -# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• -# Configuration -# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• - - -class AgenticOPDConfig(HermesAgentEnvConfig): - """Configuration for the agentic OPD environment.""" - - # --- OPD settings --- - opd_enabled: bool = Field( - default=True, - description="Enable on-policy distillation pipeline. When disabled, " - "the environment behaves like a standard agentic env (no distill fields).", - ) - distill_topk: int = Field( - default=50, - description="Number of top-K teacher logprobs per position for distillation.", - ) - prm_votes: int = Field( - default=3, - description="Number of independent judge queries for majority-voted hint extraction.", - ) - hint_max_next_state_chars: int = Field( - default=4000, - description="Maximum characters of next-state text to include in the hint judge prompt. " - "Tool results can be very long โ€” truncating prevents judge context overflow.", - ) - - # --- Reward settings --- - correctness_weight: float = Field( - default=0.7, - description="Weight for test pass/fail in reward.", - ) - efficiency_weight: float = Field( - default=0.15, - description="Weight for efficiency (fewer turns = better).", - ) - tool_usage_weight: float = Field( - default=0.15, - description="Weight for appropriate tool usage signal.", - ) - - # --- Dataset --- - dataset_name: Optional[str] = Field( - default=None, - description="HuggingFace dataset with coding tasks. " - "Expected fields: 'task' (problem description) and 'test_code' (pytest/assert tests). " - "Falls back to built-in tasks if not set or unavailable.", - ) - - # --- Eval --- - eval_size: int = Field( - default=10, - description="Number of held-out items for evaluation.", - ) - eval_split_ratio: float = Field( - default=0.15, - description="Fraction of dataset to hold out for evaluation.", - ) - - -# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• -# Environment -# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• - - -class AgenticOPDEnv(HermesAgentBaseEnv): - """ - RL environment with on-policy distillation from next-state signals. - - Runs coding tasks where the agent writes code and runs tests. - Tool results (test pass/fail, error traces) serve as next-state signals - for hint extraction and teacher logprob scoring. - - This is the first Atropos environment to populate distill_token_ids - and distill_logprobs on ScoredDataGroup for OPD training. - """ - - name = "agentic-opd" - env_config_cls = AgenticOPDConfig - - # Default toolsets: terminal for running code, file for writing it - default_toolsets = ["terminal", "file"] - - @classmethod - def config_init(cls) -> Tuple[AgenticOPDConfig, List[APIServerConfig]]: - """Default configuration.""" - env_config = AgenticOPDConfig( - # Toolsets - enabled_toolsets=["terminal", "file"], - # Agent loop - max_agent_turns=15, - agent_temperature=1.0, - system_prompt=( - "You are a skilled Python programmer. When given a coding task:\n" - "1. Write the solution to a file called 'solution.py'\n" - "2. Write the test code to a file called 'test_solution.py'\n" - "3. Run the tests with: python test_solution.py\n" - "4. If tests fail, read the error output carefully, fix your code, and re-run\n" - "5. Once all tests pass, report success\n\n" - "Be efficient โ€” write clean code and fix errors methodically." - ), - # OPD - opd_enabled=True, - distill_topk=50, - prm_votes=3, - # Training - group_size=4, - total_steps=500, - steps_per_eval=50, - use_wandb=True, - wandb_name="agentic-opd", - ) - - server_configs = [ - APIServerConfig( - base_url="http://localhost:8000/v1", - model_name="Qwen/Qwen3-4B", - server_type="vllm", - ) - ] - - return env_config, server_configs - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self._items: list[dict] = [] - self._eval_items: list[dict] = [] - self._index: int = 0 - - # Metric buffers - self._reward_buffer: list[float] = [] - self._correctness_buffer: list[float] = [] - self._efficiency_buffer: list[float] = [] - self._tool_usage_buffer: list[float] = [] - self._hints_extracted_buffer: list[int] = [] - self._opd_turns_scored_buffer: list[int] = [] - - # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• - # 1. setup โ€” load dataset - # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• - - async def setup(self) -> None: - """Load coding tasks from HuggingFace or use built-in set.""" - if self.config.dataset_name: - try: - from datasets import load_dataset - - logger.info( - "Loading dataset '%s'...", self.config.dataset_name - ) - ds = load_dataset( - self.config.dataset_name, split=self.config.dataset_split - ) - task_field = self.config.prompt_field - self._items = [ - { - "task": row.get(task_field, row.get("task", "")), - "test_code": row.get("test_code", row.get("tests", "")), - "difficulty": row.get("difficulty", "unknown"), - } - for row in ds - if row.get(task_field, row.get("task", "")) - ] - if self._items: - random.shuffle(self._items) - eval_size = max( - self.config.eval_size, - int(len(self._items) * self.config.eval_split_ratio), - ) - self._eval_items = self._items[:eval_size] - self._items = self._items[eval_size:] - logger.info( - "Loaded %d train / %d eval items from '%s'", - len(self._items), - len(self._eval_items), - self.config.dataset_name, - ) - return - except Exception as e: - logger.warning( - "Could not load dataset '%s': %s. Using built-in tasks.", - self.config.dataset_name, - e, - ) - - # Fallback to built-in tasks - items = copy.deepcopy(BUILTIN_CODING_TASKS) - random.shuffle(items) - split = max(1, len(items) * 85 // 100) - self._items = items[:split] - self._eval_items = items[split:] - logger.info( - "Using built-in coding tasks: %d train / %d eval items", - len(self._items), - len(self._eval_items), - ) - - # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• - # 2. get_next_item - # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• - - async def get_next_item(self) -> dict: - """Return the next coding task, cycling through the dataset.""" - if not self._items: - raise RuntimeError("Dataset is empty. Did you call setup()?") - item = self._items[self._index % len(self._items)] - self._index += 1 - return item - - # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• - # 3. format_prompt - # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• - - def format_prompt(self, item: dict) -> str: - """Format the coding task as a user prompt.""" - prompt = ( - f"Solve the following coding task.\n\n" - f"## Task\n{item['task']}\n\n" - ) - if item.get("test_code"): - prompt += ( - f"## Tests\nThe following test code will be used to verify your solution:\n" - f"```python\n{item['test_code']}```\n\n" - ) - prompt += ( - "## Instructions\n" - "1. Write your solution to `solution.py`\n" - "2. Write the test code to `test_solution.py`\n" - "3. Run `python test_solution.py` to verify\n" - "4. Fix any failures and re-run until all tests pass\n" - ) - return prompt - - # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• - # 4. compute_reward - # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• - - async def compute_reward( - self, - item: dict, - result: AgentResult, - ctx: ToolContext, - ) -> float: - """ - Multi-signal reward: - - correctness (0.7): Did the tests pass? - - efficiency (0.15): Fewer turns = better - - tool_usage (0.15): Did the agent actually write + run code? - """ - cfg = self.config - - # ---- Signal 1: Test correctness ---- - # Check if test_solution.py exists and passes in the agent's sandbox - correctness = 0.0 - try: - test_result = ctx.terminal("python test_solution.py 2>&1", timeout=30) - output = test_result.get("output", "") - exit_code = test_result.get("exit_code", 1) - if exit_code == 0 and "passed" in output.lower(): - correctness = 1.0 - elif exit_code == 0: - correctness = 0.8 # Ran without error but no explicit "passed" - elif "assert" in output.lower() and "error" in output.lower(): - correctness = 0.2 # Partial โ€” code runs but assertions fail - else: - correctness = 0.1 # Code errors out entirely - except Exception as e: - logger.debug("Test execution failed in reward: %s", e) - correctness = 0.0 - - # ---- Signal 2: Efficiency ---- - max_turns = cfg.max_agent_turns - turns_used = result.turns_used - if turns_used <= 3: - efficiency = 1.0 - elif turns_used <= max_turns // 2: - efficiency = 0.8 - elif turns_used <= max_turns * 3 // 4: - efficiency = 0.5 - else: - efficiency = 0.2 - - # ---- Signal 3: Tool usage ---- - tools_used = set() - for msg in result.messages: - if msg.get("role") == "assistant" and msg.get("tool_calls"): - for tc in msg["tool_calls"]: - fn = tc.get("function", {}) if isinstance(tc, dict) else {} - name = fn.get("name", "") - if name: - tools_used.add(name) - - # Good: used both terminal and file tools - if "terminal" in tools_used and ("write_file" in tools_used or "patch" in tools_used): - tool_usage = 1.0 - elif "terminal" in tools_used: - tool_usage = 0.6 - elif tools_used: - tool_usage = 0.3 - else: - tool_usage = 0.0 - - # ---- Combine ---- - reward = ( - cfg.correctness_weight * correctness - + cfg.efficiency_weight * efficiency - + cfg.tool_usage_weight * tool_usage - ) - reward = min(1.0, max(0.0, reward)) - - # Track metrics - self._reward_buffer.append(reward) - self._correctness_buffer.append(correctness) - self._efficiency_buffer.append(efficiency) - self._tool_usage_buffer.append(tool_usage) - - logger.debug( - "Reward: correctness=%.2f, efficiency=%.2f, tool_usage=%.2f โ†’ %.3f", - correctness, - efficiency, - tool_usage, - reward, - ) - return reward - - # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• - # 5. collect_trajectories โ€” OPD pipeline - # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• - - async def collect_trajectories( - self, item: Item - ) -> Tuple[ - Union[Optional[ScoredDataGroup], List[Optional[ScoredDataGroup]]], - List[Item], - ]: - """ - Override collect_trajectories to add the OPD pipeline. - - 1. Run standard rollouts via super() โ†’ ScoredDataGroup with tokens/masks/scores - 2. For each rollout, extract hints from next-state signals - 3. Score student tokens under enhanced (hint-augmented) distribution - 4. Add distill_token_ids / distill_logprobs to the ScoredDataGroup - """ - # Step 1: Run standard rollouts - scored_group, backlog = await super().collect_trajectories(item) - - # Step 2: OPD pipeline (only if enabled and we have VLLM server) - if ( - self.config.opd_enabled - and scored_group is not None - and isinstance(scored_group, dict) - and self._use_managed_server() - ): - await self._apply_opd_pipeline(scored_group) - - return scored_group, backlog - - async def _apply_opd_pipeline(self, group: ScoredDataGroup) -> None: - """ - Apply on-policy distillation to each rollout in the group. - - For each rollout's messages: - 1. Find (assistant, next_state) turn pairs - 2. Extract hints via LLM judge with majority voting - 3. Build enhanced prompt (original + hint) - 4. Score student tokens under enhanced distribution via get_logprobs - 5. Add distill_token_ids / distill_logprobs to the group - """ - messages_list = group.get("messages", []) - tokens_list = group.get("tokens", []) - - if not messages_list or not tokens_list: - logger.debug("OPD: No messages or tokens to process") - return - - all_distill_token_ids: List[Optional[List[List[int]]]] = [] - all_distill_logprobs: List[Optional[List[List[float]]]] = [] - - for seq_idx, (messages, student_tokens) in enumerate( - zip(messages_list, tokens_list) - ): - try: - distill_ids, distill_lps = await self._opd_for_sequence( - messages, student_tokens - ) - all_distill_token_ids.append(distill_ids) - all_distill_logprobs.append(distill_lps) - except Exception as e: - logger.warning( - "OPD failed for sequence %d: %s", seq_idx, e - ) - all_distill_token_ids.append(None) - all_distill_logprobs.append(None) - - # Only set distill fields if at least one sequence succeeded - any_succeeded = any(d is not None for d in all_distill_token_ids) - if any_succeeded: - # Replace None entries with zero-padded arrays matching token length - for i in range(len(all_distill_token_ids)): - if all_distill_token_ids[i] is None and i < len(tokens_list): - seq_len = len(tokens_list[i]) - k = self.config.distill_topk - all_distill_token_ids[i] = [[0] * k] * seq_len - all_distill_logprobs[i] = [[0.0] * k] * seq_len - - group["distill_token_ids"] = all_distill_token_ids - group["distill_logprobs"] = all_distill_logprobs - logger.info( - "OPD: Set distill fields on %d/%d sequences", - sum(1 for d in all_distill_token_ids if d is not None), - len(all_distill_token_ids), - ) - - async def _opd_for_sequence( - self, messages: List[Dict], student_tokens: List[int] - ) -> Tuple[List[List[int]], List[List[float]]]: - """ - Run OPD for a single rollout sequence. - - 1. Walk conversation to find (assistant, next_state) pairs - 2. Extract hints from next-state signals - 3. For each hint-augmented turn, score student tokens via get_logprobs - 4. Merge per-turn teacher logprobs into a full-sequence distill array - - Returns: - (distill_token_ids, distill_logprobs) each of shape [seq_len][top_k] - """ - k = self.config.distill_topk - seq_len = len(student_tokens) - - # Initialize with zeros (no distill info = neutral) - distill_token_ids: List[List[int]] = [[0] * k for _ in range(seq_len)] - distill_logprobs: List[List[float]] = [[0.0] * k for _ in range(seq_len)] - - # Find (assistant, next_state) turn pairs - turn_pairs = self._extract_turn_pairs(messages) - if not turn_pairs: - return distill_token_ids, distill_logprobs - - hints_extracted = 0 - turns_scored = 0 - - for pair in turn_pairs: - try: - hint = await self._extract_hint( - pair["assistant_text"], - pair["next_state_text"], - pair["next_state_role"], - ) - if not hint: - continue - - hints_extracted += 1 - - # Build enhanced prompt with hint - enhanced_messages = _append_hint_to_messages( - pair["context_messages"], hint - ) - - # Tokenize the enhanced prompt - if not self.tokenizer: - logger.warning("OPD: No tokenizer available, skipping scoring") - continue - - enhanced_prompt = self.tokenizer.apply_chat_template( - enhanced_messages, - tokenize=False, - add_generation_prompt=True, - ) - - # Tokenize the assistant response to score - response_text = pair["assistant_text"] - enhanced_full_text = enhanced_prompt + response_text - enhanced_ids = self.tokenizer( - enhanced_full_text, add_special_tokens=False - )["input_ids"] - - response_ids = self.tokenizer( - response_text, add_special_tokens=False - )["input_ids"] - response_len = len(response_ids) - - if response_len == 0: - continue - - # Score via get_logprobs โ€” teacher scoring the student's tokens - # under the enhanced (hint-augmented) distribution - try: - logprob_result = await self.server.get_logprobs( - input_ids=enhanced_ids, - top_k=k, - split="eval", # Use eval semaphore to not block training - ) - except Exception as e: - logger.debug("get_logprobs failed: %s", e) - continue - - teacher_topk_ids = logprob_result.get("prompt_topk_token_ids", []) - teacher_topk_lps = logprob_result.get("prompt_topk_logprobs", []) - - if not teacher_topk_ids: - continue - - # Extract only the response positions (last response_len entries) - if len(teacher_topk_ids) >= response_len: - resp_topk_ids = teacher_topk_ids[-response_len:] - resp_topk_lps = teacher_topk_lps[-response_len:] - else: - # Pad from the left if the response was shorter than expected - pad_len = response_len - len(teacher_topk_ids) - resp_topk_ids = [[0] * k] * pad_len + teacher_topk_ids - resp_topk_lps = [[0.0] * k] * pad_len + teacher_topk_lps - - # Map these back to the student's full sequence positions - # Find where this assistant turn's tokens appear in the full sequence - turn_start = self._find_token_span( - student_tokens, response_ids - ) - if turn_start is not None: - for j in range(min(response_len, seq_len - turn_start)): - pos = turn_start + j - if pos < seq_len and j < len(resp_topk_ids): - # Pad/truncate to exactly k entries - ids = resp_topk_ids[j][:k] - lps = resp_topk_lps[j][:k] - while len(ids) < k: - ids.append(0) - lps.append(0.0) - distill_token_ids[pos] = ids - distill_logprobs[pos] = lps - turns_scored += 1 - - except Exception as e: - logger.debug("OPD turn processing failed: %s", e) - continue - - # Track OPD metrics - self._hints_extracted_buffer.append(hints_extracted) - self._opd_turns_scored_buffer.append(turns_scored) - - logger.debug( - "OPD sequence: %d turn pairs, %d hints extracted, %d turns scored", - len(turn_pairs), - hints_extracted, - turns_scored, - ) - return distill_token_ids, distill_logprobs - - def _extract_turn_pairs( - self, messages: List[Dict] - ) -> List[Dict[str, Any]]: - """ - Walk conversation messages to find (assistant, next_state) pairs. - - A "turn pair" is an assistant message with content (the response) - followed by one or more tool results or a user reply (the next state). - - Returns list of dicts: - { - "context_messages": messages up to (not including) the assistant turn, - "assistant_text": the assistant's response text, - "next_state_text": the next state content (tool result or user reply), - "next_state_role": "tool" or "user", - } - """ - pairs = [] - i = 0 - while i < len(messages): - msg = messages[i] - if msg.get("role") == "assistant" and msg.get("content"): - # Found an assistant message with content - assistant_text = msg["content"] - context = messages[:i] # Everything before this turn - - # Look ahead for next state - j = i + 1 - # Skip tool_calls-only assistant messages and collect tool results - next_states = [] - while j < len(messages): - next_msg = messages[j] - if next_msg.get("role") == "tool": - next_states.append(next_msg) - j += 1 - elif next_msg.get("role") == "user": - next_states.append(next_msg) - break - else: - break - - if next_states: - # Combine all next-state content - next_text_parts = [] - next_role = next_states[0].get("role", "tool") - for ns in next_states: - content = ns.get("content", "") - if content: - # Truncate very long tool outputs - max_chars = self.config.hint_max_next_state_chars - if len(content) > max_chars: - content = content[:max_chars] + "\n...[truncated]" - next_text_parts.append(content) - - next_text = "\n---\n".join(next_text_parts) - if next_text.strip(): - pairs.append( - { - "context_messages": context, - "assistant_text": assistant_text, - "next_state_text": next_text, - "next_state_role": next_role, - } - ) - i += 1 - return pairs - - async def _extract_hint( - self, - assistant_text: str, - next_state_text: str, - next_state_role: str, - ) -> Optional[str]: - """ - Extract a hindsight hint from a next-state signal using majority-voted LLM judge. - - Returns the hint string if the judge votes positively, None otherwise. - """ - judge_messages = _build_hint_judge_messages( - response_text=assistant_text, - next_state_text=next_state_text, - next_state_role=next_state_role, - ) - - # Majority voting across multiple judge queries - votes = [] - tasks = [] - for _ in range(self.config.prm_votes): - tasks.append( - self.server.chat_completion( - messages=judge_messages, - n=1, - max_tokens=500, - temperature=0.7, - split="eval", - ) - ) - - results = await asyncio.gather(*tasks, return_exceptions=True) - - for result in results: - if isinstance(result, Exception): - logger.debug("Hint judge call failed: %s", result) - votes.append({"score": None, "hint": ""}) - continue - try: - text = result.choices[0].message.content or "" - score, hint = _parse_hint_result(text) - votes.append({"score": score, "hint": hint}) - except Exception as e: - logger.debug("Hint parse failed: %s", e) - votes.append({"score": None, "hint": ""}) - - selected = _select_best_hint(votes) - if selected is None: - return None - return selected["hint"] - - @staticmethod - def _find_token_span( - full_tokens: List[int], sub_tokens: List[int] - ) -> Optional[int]: - """ - Find where sub_tokens appears in full_tokens. - Returns the start index, or None if not found. - - Uses a sliding window search. For long sequences, searches - from the end since assistant responses are typically at the end. - """ - if not sub_tokens or not full_tokens: - return None - sub_len = len(sub_tokens) - full_len = len(full_tokens) - if sub_len > full_len: - return None - - # Search backwards (assistant responses are usually near the end) - for i in range(full_len - sub_len, -1, -1): - if full_tokens[i : i + sub_len] == sub_tokens: - return i - return None - - # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• - # 6. evaluate - # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• - - async def evaluate(self, *args, **kwargs) -> None: - """ - Evaluate on held-out coding tasks using the full agent loop. - No OPD during eval โ€” just standard agentic evaluation. - """ - if not self._eval_items: - logger.warning("No eval items available.") - return - - eval_size = min(self.config.eval_size, len(self._eval_items)) - eval_items = self._eval_items[:eval_size] - - logger.info("Running eval on %d coding tasks...", len(eval_items)) - start_time = time.time() - samples = [] - - tools, valid_names = self._resolve_tools_for_group() - - for i, item in enumerate(eval_items): - task_id = str(uuid.uuid4()) - logger.info( - "Eval [%d/%d]: %s...", i + 1, len(eval_items), item["task"][:60] - ) - - try: - messages: List[Dict[str, Any]] = [] - if self.config.system_prompt: - messages.append( - {"role": "system", "content": self.config.system_prompt} - ) - messages.append( - {"role": "user", "content": self.format_prompt(item)} - ) - - agent = HermesAgentLoop( - server=self.server, - tool_schemas=tools, - valid_tool_names=valid_names, - max_turns=self.config.max_agent_turns, - task_id=task_id, - temperature=0.0, - max_tokens=self.config.max_token_length, - extra_body=self.config.extra_body, - budget_config=self.config.build_budget_config(), - ) - result = await agent.run(messages) - - # Compute reward (track buffer lengths to rollback eval pollution) - buf_len = len(self._correctness_buffer) - ctx = ToolContext(task_id) - try: - reward = await self.compute_reward(item, result, ctx) - finally: - ctx.cleanup() - - # Extract correctness and rollback training buffers - correctness = ( - self._correctness_buffer[buf_len] - if len(self._correctness_buffer) > buf_len - else 0.0 - ) - for buf in ( - self._reward_buffer, - self._correctness_buffer, - self._efficiency_buffer, - self._tool_usage_buffer, - ): - if len(buf) > buf_len: - buf.pop() - - # Also rollback OPD buffers if they were touched - for buf in ( - self._hints_extracted_buffer, - self._opd_turns_scored_buffer, - ): - if len(buf) > buf_len: - buf.pop() - - # Extract final response - final_response = "" - for msg in reversed(result.messages): - if ( - msg.get("role") == "assistant" - and msg.get("content") - and not final_response - ): - final_response = msg["content"] - break - - samples.append( - { - "prompt": item["task"][:200], - "response": final_response[:500], - "correctness": correctness, - "reward": reward, - "turns": result.turns_used, - } - ) - - logger.info( - " โ†’ correctness=%.2f, reward=%.3f, turns=%d", - correctness, - reward, - result.turns_used, - ) - - except Exception as e: - logger.error("Eval error: %s", e) - samples.append( - { - "prompt": item["task"][:200], - "response": f"ERROR: {e}", - "correctness": 0.0, - "reward": 0.0, - "turns": 0, - } - ) - - end_time = time.time() - - correctness_scores = [s["correctness"] for s in samples] - rewards = [s["reward"] for s in samples] - n = len(samples) - - eval_metrics = { - "eval/mean_correctness": sum(correctness_scores) / n if n else 0.0, - "eval/mean_reward": sum(rewards) / n if n else 0.0, - "eval/pass_rate": ( - sum(1 for c in correctness_scores if c >= 0.8) / n if n else 0.0 - ), - "eval/n_items": n, - } - - logger.info( - "Eval complete โ€” correctness=%.3f, reward=%.3f, pass_rate=%.0f%%", - eval_metrics["eval/mean_correctness"], - eval_metrics["eval/mean_reward"], - eval_metrics["eval/pass_rate"] * 100, - ) - - await self.evaluate_log( - metrics=eval_metrics, - samples=samples, - start_time=start_time, - end_time=end_time, - ) - - # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• - # 7. wandb_log โ€” custom OPD metrics - # โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• - - async def wandb_log(self, wandb_metrics: Optional[Dict] = None) -> None: - """Log reward breakdown and OPD-specific metrics to wandb.""" - if wandb_metrics is None: - wandb_metrics = {} - - if self._reward_buffer: - n = len(self._reward_buffer) - wandb_metrics["train/mean_reward"] = sum(self._reward_buffer) / n - wandb_metrics["train/mean_correctness"] = ( - sum(self._correctness_buffer) / n - ) - wandb_metrics["train/mean_efficiency"] = ( - sum(self._efficiency_buffer) / n - ) - wandb_metrics["train/mean_tool_usage"] = ( - sum(self._tool_usage_buffer) / n - ) - wandb_metrics["train/pass_rate"] = ( - sum(1 for c in self._correctness_buffer if c >= 0.8) / n - ) - wandb_metrics["train/total_rollouts"] = n - - self._reward_buffer.clear() - self._correctness_buffer.clear() - self._efficiency_buffer.clear() - self._tool_usage_buffer.clear() - - # OPD-specific metrics - if self._hints_extracted_buffer: - n = len(self._hints_extracted_buffer) - wandb_metrics["opd/mean_hints_per_rollout"] = ( - sum(self._hints_extracted_buffer) / n - ) - wandb_metrics["opd/mean_turns_scored"] = ( - sum(self._opd_turns_scored_buffer) / n - ) - wandb_metrics["opd/hint_rate"] = ( - sum(1 for h in self._hints_extracted_buffer if h > 0) / n - ) - wandb_metrics["opd/total_hints"] = sum(self._hints_extracted_buffer) - wandb_metrics["opd/total_scored_turns"] = sum( - self._opd_turns_scored_buffer - ) - - self._hints_extracted_buffer.clear() - self._opd_turns_scored_buffer.clear() - - await super().wandb_log(wandb_metrics) - - -# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• -# Entry point -# โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ• - -if __name__ == "__main__": - AgenticOPDEnv.cli() diff --git a/environments/benchmarks/tblite/README.md b/environments/benchmarks/tblite/README.md deleted file mode 100644 index 54b3745c3835..000000000000 --- a/environments/benchmarks/tblite/README.md +++ /dev/null @@ -1,73 +0,0 @@ -# OpenThoughts-TBLite Evaluation Environment - -This environment evaluates terminal agents on the [OpenThoughts-TBLite](https://huggingface.co/datasets/open-thoughts/OpenThoughts-TBLite) benchmark, a difficulty-calibrated subset of [Terminal-Bench 2.0](https://www.tbench.ai/leaderboard/terminal-bench/2.0). - -## Source - -OpenThoughts-TBLite was created by the [OpenThoughts](https://www.openthoughts.ai/) Agent team in collaboration with [Snorkel AI](https://snorkel.ai/) and [Bespoke Labs](https://bespokelabs.ai/). The original dataset and documentation live at: - -- **Dataset (source):** [open-thoughts/OpenThoughts-TBLite](https://huggingface.co/datasets/open-thoughts/OpenThoughts-TBLite) -- **GitHub:** [open-thoughts/OpenThoughts-TBLite](https://github.com/open-thoughts/OpenThoughts-TBLite) -- **Blog post:** [openthoughts.ai/blog/openthoughts-tblite](https://www.openthoughts.ai/blog/openthoughts-tblite) - -## Our Dataset - -We converted the source into the same schema used by our Terminal-Bench 2.0 environment (pre-built Docker Hub images, base64-encoded test tarballs, etc.) and published it as: - -- **Dataset (ours):** [NousResearch/openthoughts-tblite](https://huggingface.co/datasets/NousResearch/openthoughts-tblite) -- **Docker images:** `nousresearch/tblite-:latest` on Docker Hub (100 images) - -The conversion script is at `scripts/prepare_tblite_dataset.py`. - -## Why TBLite? - -Terminal-Bench 2.0 is one of the strongest frontier evaluations for terminal agents, but when a model scores near the floor (e.g., Qwen 3 8B at <1%), many changes look identical in aggregate score. TBLite addresses this by calibrating task difficulty using Claude Haiku 4.5 as a reference: - -| Difficulty | Pass Rate Range | Tasks | -|------------|----------------|-------| -| Easy | >= 70% | 40 | -| Medium | 40-69% | 26 | -| Hard | 10-39% | 26 | -| Extreme | < 10% | 8 | - -This gives enough solvable tasks to detect small improvements quickly, while preserving enough hard tasks to avoid saturation. The correlation between TBLite and TB2 scores is **r = 0.911**. - -TBLite also runs 2.6-8x faster than the full TB2, making it practical for iteration loops. - -## Usage - -```bash -# Run the full benchmark -python environments/benchmarks/tblite/tblite_env.py evaluate - -# Filter to specific tasks -python environments/benchmarks/tblite/tblite_env.py evaluate \ - --env.task_filter "broken-python,pandas-etl" - -# Use a different model -python environments/benchmarks/tblite/tblite_env.py evaluate \ - --server.model_name "qwen/qwen3-30b" -``` - -## Architecture - -`TBLiteEvalEnv` is a thin subclass of `TerminalBench2EvalEnv`. All evaluation logic (agent loop, Docker sandbox management, test verification, metrics) is inherited. Only the defaults differ: - -| Setting | TB2 | TBLite | -|----------------|----------------------------------|-----------------------------------------| -| Dataset | `NousResearch/terminal-bench-2` | `NousResearch/openthoughts-tblite` | -| Tasks | 89 | 100 | -| Task timeout | 1800s (30 min) | 1200s (20 min) | -| Wandb name | `terminal-bench-2` | `openthoughts-tblite` | - -## Citation - -```bibtex -@software{OpenThoughts-TBLite, - author = {OpenThoughts-Agent team, Snorkel AI, Bespoke Labs}, - month = Feb, - title = {{OpenThoughts-TBLite: A High-Signal Benchmark for Iterating on Terminal Agents}}, - howpublished = {https://www.openthoughts.ai/blog/openthoughts-tblite}, - year = {2026} -} -``` diff --git a/environments/benchmarks/tblite/__init__.py b/environments/benchmarks/tblite/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/environments/benchmarks/tblite/default.yaml b/environments/benchmarks/tblite/default.yaml deleted file mode 100644 index cb5218280612..000000000000 --- a/environments/benchmarks/tblite/default.yaml +++ /dev/null @@ -1,39 +0,0 @@ -# OpenThoughts-TBLite Evaluation -- Default Configuration -# -# Eval-only environment for the TBLite benchmark (100 difficulty-calibrated -# terminal tasks, a faster proxy for Terminal-Bench 2.0). -# Uses Modal terminal backend for per-task cloud-isolated sandboxes -# and OpenRouter for inference. -# -# Usage: -# python environments/benchmarks/tblite/tblite_env.py evaluate \ -# --config environments/benchmarks/tblite/default.yaml -# -# # Override model: -# python environments/benchmarks/tblite/tblite_env.py evaluate \ -# --config environments/benchmarks/tblite/default.yaml \ -# --openai.model_name anthropic/claude-sonnet-4 - -env: - enabled_toolsets: ["terminal", "file"] - max_agent_turns: 60 - max_token_length: 32000 - agent_temperature: 0.8 - terminal_backend: "modal" - terminal_timeout: 300 # 5 min per command (builds, pip install) - tool_pool_size: 128 # thread pool for 100 parallel tasks - dataset_name: "NousResearch/openthoughts-tblite" - test_timeout: 600 - task_timeout: 1200 # 20 min wall-clock per task (TBLite tasks are faster) - tokenizer_name: "NousResearch/Hermes-3-Llama-3.1-8B" - use_wandb: true - wandb_name: "openthoughts-tblite" - ensure_scores_are_not_same: false - data_dir_to_save_evals: "environments/benchmarks/evals/openthoughts-tblite" - -openai: - base_url: "https://openrouter.ai/api/v1" - model_name: "anthropic/claude-opus-4.6" - server_type: "openai" - health_check: false - # api_key loaded from OPENROUTER_API_KEY in .env diff --git a/environments/benchmarks/tblite/local.yaml b/environments/benchmarks/tblite/local.yaml deleted file mode 100644 index 35d4b8968698..000000000000 --- a/environments/benchmarks/tblite/local.yaml +++ /dev/null @@ -1,38 +0,0 @@ -# OpenThoughts-TBLite Evaluation -- Docker Backend (Local Compute) -# -# Runs tasks in Docker containers on the local machine. -# Sandboxed like Modal but no cloud costs. Good for dev/testing. -# -# Usage: -# python environments/benchmarks/tblite/tblite_env.py evaluate \ -# --config environments/benchmarks/tblite/local.yaml -# -# # Override concurrency: -# python environments/benchmarks/tblite/tblite_env.py evaluate \ -# --config environments/benchmarks/tblite/local.yaml \ -# --env.eval_concurrency 4 - -env: - enabled_toolsets: ["terminal", "file"] - max_agent_turns: 60 - max_token_length: 32000 - agent_temperature: 0.8 - terminal_backend: "docker" - terminal_timeout: 300 - tool_pool_size: 16 - dataset_name: "NousResearch/openthoughts-tblite" - test_timeout: 600 - task_timeout: 1200 - eval_concurrency: 8 # max 8 tasks at once - tokenizer_name: "NousResearch/Hermes-3-Llama-3.1-8B" - use_wandb: false - wandb_name: "openthoughts-tblite-local" - ensure_scores_are_not_same: false - data_dir_to_save_evals: "environments/benchmarks/evals/openthoughts-tblite-local" - -openai: - base_url: "https://openrouter.ai/api/v1" - model_name: "anthropic/claude-sonnet-4" - server_type: "openai" - health_check: false - # api_key loaded from OPENROUTER_API_KEY in .env diff --git a/environments/benchmarks/tblite/local_vllm.yaml b/environments/benchmarks/tblite/local_vllm.yaml deleted file mode 100644 index 17689ba1d35f..000000000000 --- a/environments/benchmarks/tblite/local_vllm.yaml +++ /dev/null @@ -1,40 +0,0 @@ -# OpenThoughts-TBLite Evaluation -- Local vLLM Backend -# -# Runs against a local vLLM server with Docker sandboxes. -# -# Start the vLLM server from the atropos directory: -# python -m example_trainer.vllm_api_server \ -# --model Qwen/Qwen3-4B-Instruct-2507 \ -# --port 9001 \ -# --gpu-memory-utilization 0.8 \ -# --max-model-len=32000 -# -# Then run: -# python environments/benchmarks/tblite/tblite_env.py evaluate \ -# --config environments/benchmarks/tblite/local_vllm.yaml - -env: - enabled_toolsets: ["terminal", "file"] - max_agent_turns: 60 - max_token_length: 16000 - agent_temperature: 0.6 - terminal_backend: "docker" - terminal_timeout: 300 - tool_pool_size: 16 - dataset_name: "NousResearch/openthoughts-tblite" - test_timeout: 600 - task_timeout: 1200 - eval_concurrency: 8 - tool_call_parser: "hermes" - system_prompt: "You are an expert terminal agent. You MUST use the provided tools to complete tasks. Use the terminal tool to run shell commands, read_file to read files, write_file to write files, search_files to search, and patch to edit files. Do NOT write out solutions as text - execute them using the tools. Always start by exploring the environment with terminal commands." - tokenizer_name: "Qwen/Qwen3-4B-Instruct-2507" - use_wandb: false - wandb_name: "tblite-qwen3-4b-instruct" - ensure_scores_are_not_same: false - data_dir_to_save_evals: "environments/benchmarks/evals/tblite-qwen3-4b-local" - -openai: - base_url: "http://localhost:9001" - model_name: "Qwen/Qwen3-4B-Instruct-2507" - server_type: "vllm" - health_check: false diff --git a/environments/benchmarks/tblite/run_eval.sh b/environments/benchmarks/tblite/run_eval.sh deleted file mode 100755 index 9d860bf5ef79..000000000000 --- a/environments/benchmarks/tblite/run_eval.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash - -# OpenThoughts-TBLite Evaluation -# -# Run from repo root: -# bash environments/benchmarks/tblite/run_eval.sh -# -# Override model: -# bash environments/benchmarks/tblite/run_eval.sh \ -# --openai.model_name anthropic/claude-sonnet-4 -# -# Run a subset: -# bash environments/benchmarks/tblite/run_eval.sh \ -# --env.task_filter broken-python,pandas-etl -# -# All terminal settings (backend, timeout, lifetime, pool size) are -# configured via env config fields -- no env vars needed. - -set -euo pipefail - -mkdir -p logs evals/openthoughts-tblite -LOG_FILE="logs/tblite_$(date +%Y%m%d_%H%M%S).log" - -echo "OpenThoughts-TBLite Evaluation" -echo "Log file: $LOG_FILE" -echo "" - -# Unbuffered python output so logs are written in real-time -export PYTHONUNBUFFERED=1 - -# Show INFO-level agent loop timing (api/tool durations per turn) -# These go to the log file; tqdm + [START]/[PASS]/[FAIL] go to terminal -export LOGLEVEL=INFO - -python tblite_env.py evaluate \ - --config default.yaml \ - "$@" \ - 2>&1 | tee "$LOG_FILE" - -echo "" -echo "Log saved to: $LOG_FILE" -echo "Eval results: evals/openthoughts-tblite/" diff --git a/environments/benchmarks/tblite/tblite_env.py b/environments/benchmarks/tblite/tblite_env.py deleted file mode 100644 index 4b23f9cc5583..000000000000 --- a/environments/benchmarks/tblite/tblite_env.py +++ /dev/null @@ -1,119 +0,0 @@ -""" -OpenThoughts-TBLite Evaluation Environment - -A lighter, faster alternative to Terminal-Bench 2.0 for iterating on terminal -agents. Uses the same evaluation logic as TerminalBench2EvalEnv but defaults -to the NousResearch/openthoughts-tblite dataset (100 difficulty-calibrated -tasks vs TB2's 89 harder tasks). - -TBLite tasks are a curated subset of TB2 with a difficulty distribution -designed to give meaningful signal even for smaller models: - - Easy (40 tasks): >= 70% pass rate with Claude Haiku 4.5 - - Medium (26 tasks): 40-69% pass rate - - Hard (26 tasks): 10-39% pass rate - - Extreme (8 tasks): < 10% pass rate - -Usage: - python environments/benchmarks/tblite/tblite_env.py evaluate - - # Filter to specific tasks: - python environments/benchmarks/tblite/tblite_env.py evaluate \\ - --env.task_filter "broken-python,pandas-etl" -""" - -import os -import sys -from pathlib import Path -from typing import List, Tuple - -_repo_root = Path(__file__).resolve().parent.parent.parent.parent -if str(_repo_root) not in sys.path: - sys.path.insert(0, str(_repo_root)) - -from pydantic import Field - -from atroposlib.envs.base import EvalHandlingEnum -from atroposlib.envs.server_handling.server_manager import APIServerConfig - -from environments.benchmarks.terminalbench_2.terminalbench2_env import ( - TerminalBench2EvalConfig, - TerminalBench2EvalEnv, -) - - -class TBLiteEvalConfig(TerminalBench2EvalConfig): - """Configuration for the OpenThoughts-TBLite evaluation environment. - - Inherits all TB2 config fields. Only the dataset default and task timeout - differ -- TBLite tasks are calibrated to be faster. - """ - - dataset_name: str = Field( - default="NousResearch/openthoughts-tblite", - description="HuggingFace dataset containing TBLite tasks.", - ) - - task_timeout: int = Field( - default=1200, - description="Maximum wall-clock seconds per task. TBLite tasks are " - "generally faster than TB2, so 20 minutes is usually sufficient.", - ) - - -class TBLiteEvalEnv(TerminalBench2EvalEnv): - """OpenThoughts-TBLite evaluation environment. - - Inherits all evaluation logic from TerminalBench2EvalEnv (agent loop, - test verification, Docker image resolution, metrics, wandb logging). - Only the default configuration differs. - """ - - name = "openthoughts-tblite" - env_config_cls = TBLiteEvalConfig - - @classmethod - def config_init(cls) -> Tuple[TBLiteEvalConfig, List[APIServerConfig]]: - env_config = TBLiteEvalConfig( - enabled_toolsets=["terminal", "file"], - disabled_toolsets=None, - distribution=None, - - max_agent_turns=60, - max_token_length=16000, - agent_temperature=0.6, - system_prompt=None, - - terminal_backend="modal", - terminal_timeout=300, - - test_timeout=180, - - # 100 tasks in parallel - tool_pool_size=128, - - eval_handling=EvalHandlingEnum.STOP_TRAIN, - group_size=1, - steps_per_eval=1, - total_steps=1, - - tokenizer_name="NousResearch/Hermes-3-Llama-3.1-8B", - use_wandb=True, - wandb_name="openthoughts-tblite", - ensure_scores_are_not_same=False, - ) - - server_configs = [ - APIServerConfig( - base_url="https://openrouter.ai/api/v1", - model_name="anthropic/claude-sonnet-4", - server_type="openai", - api_key=os.getenv("OPENROUTER_API_KEY", ""), - health_check=False, - ) - ] - - return env_config, server_configs - - -if __name__ == "__main__": - TBLiteEvalEnv.cli() diff --git a/environments/benchmarks/terminalbench_2/__init__.py b/environments/benchmarks/terminalbench_2/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/environments/benchmarks/terminalbench_2/default.yaml b/environments/benchmarks/terminalbench_2/default.yaml deleted file mode 100644 index eb675b12e70d..000000000000 --- a/environments/benchmarks/terminalbench_2/default.yaml +++ /dev/null @@ -1,42 +0,0 @@ -# Terminal-Bench 2.0 Evaluation -- Default Configuration -# -# Eval-only environment for the TB2 benchmark (89 terminal tasks). -# Uses Modal terminal backend for per-task cloud-isolated sandboxes -# and OpenRouter for inference. -# -# Usage: -# python environments/benchmarks/terminalbench_2/terminalbench2_env.py evaluate \ -# --config environments/benchmarks/terminalbench_2/default.yaml -# -# # Override model: -# python environments/benchmarks/terminalbench_2/terminalbench2_env.py evaluate \ -# --config environments/benchmarks/terminalbench_2/default.yaml \ -# --openai.model_name anthropic/claude-sonnet-4 - -env: - enabled_toolsets: ["terminal", "file"] - max_agent_turns: 60 - max_token_length: 32000 - agent_temperature: 0.8 - terminal_backend: "modal" - terminal_timeout: 300 # 5 min per command (builds, pip install) - tool_pool_size: 128 # thread pool for 89 parallel tasks - dataset_name: "NousResearch/terminal-bench-2" - test_timeout: 600 - task_timeout: 1800 # 30 min wall-clock per task, auto-FAIL if exceeded - tokenizer_name: "NousResearch/Hermes-3-Llama-3.1-8B" - use_wandb: true - wandb_name: "terminal-bench-2" - ensure_scores_are_not_same: false - data_dir_to_save_evals: "environments/benchmarks/evals/terminal-bench-2" - # CRITICAL: Limit concurrent Modal sandbox creations to avoid deadlocks. - # Modal's blocking calls (App.lookup, etc.) deadlock when too many sandboxes - # are created simultaneously inside thread pool workers via asyncio.run(). - max_concurrent_tasks: 8 - -openai: - base_url: "https://openrouter.ai/api/v1" - model_name: "anthropic/claude-opus-4.6" - server_type: "openai" - health_check: false - # api_key loaded from OPENROUTER_API_KEY in .env diff --git a/environments/benchmarks/terminalbench_2/run_eval.sh b/environments/benchmarks/terminalbench_2/run_eval.sh deleted file mode 100755 index ffbe48480658..000000000000 --- a/environments/benchmarks/terminalbench_2/run_eval.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash - -# Terminal-Bench 2.0 Evaluation -# -# Run from repo root: -# bash environments/benchmarks/terminalbench_2/run_eval.sh -# -# Override model: -# bash environments/benchmarks/terminalbench_2/run_eval.sh \ -# --openai.model_name anthropic/claude-sonnet-4 -# -# Run a subset: -# bash environments/benchmarks/terminalbench_2/run_eval.sh \ -# --env.task_filter fix-git,git-multibranch -# -# All terminal settings (backend, timeout, lifetime, pool size) are -# configured via env config fields -- no env vars needed. - -set -euo pipefail - -mkdir -p logs evals/terminal-bench-2 -LOG_FILE="logs/terminalbench2_$(date +%Y%m%d_%H%M%S).log" - -echo "Terminal-Bench 2.0 Evaluation" -echo "Log file: $LOG_FILE" -echo "" - -# Unbuffered python output so logs are written in real-time -export PYTHONUNBUFFERED=1 - -# Show INFO-level agent loop timing (api/tool durations per turn) -# These go to the log file; tqdm + [START]/[PASS]/[FAIL] go to terminal -export LOGLEVEL=INFO - -python terminalbench2_env.py evaluate \ - --config default.yaml \ - "$@" \ - 2>&1 | tee "$LOG_FILE" - -echo "" -echo "Log saved to: $LOG_FILE" -echo "Eval results: evals/terminal-bench-2/" diff --git a/environments/benchmarks/terminalbench_2/terminalbench2_env.py b/environments/benchmarks/terminalbench_2/terminalbench2_env.py deleted file mode 100644 index 1a76b8da61e1..000000000000 --- a/environments/benchmarks/terminalbench_2/terminalbench2_env.py +++ /dev/null @@ -1,1016 +0,0 @@ -""" -TerminalBench2Env -- Terminal-Bench 2.0 Evaluation Environment - -Evaluates agentic LLMs on challenging terminal tasks from Terminal-Bench 2.0. -Each task provides a unique Docker environment (pre-built on Docker Hub), a natural -language instruction, and a test suite for verification. The agent uses terminal + -file tools to complete the task, then the test suite runs inside the same sandbox. - -This is an eval-only environment (not a training environment). It is designed to -be run via the `evaluate` subcommand: - - python environments/terminalbench2_env.py evaluate \\ - --env.dataset_name NousResearch/terminal-bench-2 - -The evaluate flow: - 1. setup() -- Loads the TB2 dataset from HuggingFace - 2. evaluate() -- Iterates over all tasks, running each through: - a. rollout_and_score_eval() -- Per-task agent loop + test verification - - Resolves Docker image (pre-built Hub image or Dockerfile fallback) - - Registers per-task Modal sandbox via register_task_env_overrides() - - Runs the HermesAgentLoop (terminal + file tools) - - Uploads test suite and runs test.sh in the same sandbox - - Returns binary pass/fail result - b. Aggregates per-task, per-category, and overall pass rates - c. Logs results via evaluate_log() and wandb - -Key features: - - Per-task Modal sandboxes using pre-built Docker Hub images - - Binary reward: 1.0 if all tests pass, 0.0 otherwise - - Concurrency-controlled parallel evaluation via asyncio.Semaphore - - Per-task, per-category, and aggregate pass rate tracking -""" - -import asyncio -import base64 -import io -import json -import logging -import os -import shutil -import sys -import tarfile -import tempfile -import time -import uuid -from collections import defaultdict -from pathlib import Path, PurePosixPath, PureWindowsPath -from typing import Any, Dict, List, Optional, Tuple, Union - -# Ensure repo root is on sys.path for imports -_repo_root = Path(__file__).resolve().parent.parent.parent.parent -if str(_repo_root) not in sys.path: - sys.path.insert(0, str(_repo_root)) - -from pydantic import Field - -from atroposlib.envs.base import EvalHandlingEnum -from atroposlib.envs.server_handling.server_manager import APIServerConfig - -from environments.agent_loop import AgentResult, HermesAgentLoop -from environments.hermes_base_env import HermesAgentBaseEnv, HermesAgentEnvConfig -from environments.tool_context import ToolContext -from tools.terminal_tool import ( - register_task_env_overrides, - clear_task_env_overrides, - cleanup_vm, -) - -logger = logging.getLogger(__name__) - - -# ============================================================================= -# Configuration -# ============================================================================= - -class TerminalBench2EvalConfig(HermesAgentEnvConfig): - """ - Configuration for the Terminal-Bench 2.0 evaluation environment. - - Extends HermesAgentEnvConfig with TB2-specific settings for dataset loading, - test execution, task filtering, and eval concurrency. - """ - - # --- Dataset --- - dataset_name: str = Field( - default="NousResearch/terminal-bench-2", - description="HuggingFace dataset containing TB2 tasks.", - ) - - # --- Test execution --- - test_timeout: int = Field( - default=180, - description="Timeout in seconds for running the test suite after agent completes.", - ) - - # --- Image strategy --- - force_build: bool = Field( - default=False, - description="If True, always build from Dockerfile (ignore docker_image). " - "Useful for testing custom Dockerfiles.", - ) - - # --- Task filtering (comma-separated from CLI) --- - task_filter: Optional[str] = Field( - default=None, - description="Comma-separated task names to run (e.g., 'fix-git,git-multibranch'). " - "If not set, all tasks are run.", - ) - skip_tasks: Optional[str] = Field( - default=None, - description="Comma-separated task names to skip on top of the default skip list.", - ) - - # --- Per-task wall-clock timeout --- - task_timeout: int = Field( - default=1800, - description="Maximum wall-clock seconds per task (agent loop + verification). " - "Tasks exceeding this are scored as FAIL. Default 30 minutes.", - ) - - # --- Concurrency control --- - max_concurrent_tasks: int = Field( - default=8, - description="Maximum number of tasks to run concurrently. " - "Limits concurrent Modal sandbox creations to avoid async/threading deadlocks. " - "Modal has internal limits and creating too many sandboxes simultaneously " - "causes blocking calls to deadlock inside the thread pool.", - ) - - # --- Eval concurrency --- - eval_concurrency: int = Field( - default=0, - description="Maximum number of tasks to evaluate in parallel. " - "0 means unlimited (all tasks run concurrently). " - "Set to 8 for local backends to avoid overwhelming the machine.", - ) - - -# Tasks that cannot run properly on Modal and are excluded from scoring. -MODAL_INCOMPATIBLE_TASKS = { - "qemu-startup", # Needs KVM/hardware virtualization - "qemu-alpine-ssh", # Needs KVM/hardware virtualization - "crack-7z-hash", # Password brute-force -- too slow for cloud sandbox timeouts -} - - -# ============================================================================= -# Tar extraction helper -# ============================================================================= - -def _normalize_tar_member_parts(member_name: str) -> list: - """Return safe path components for a tar member or raise ValueError.""" - normalized_name = member_name.replace("\\", "/") - posix_path = PurePosixPath(normalized_name) - windows_path = PureWindowsPath(member_name) - - if ( - not normalized_name - or posix_path.is_absolute() - or windows_path.is_absolute() - or windows_path.drive - ): - raise ValueError(f"Unsafe archive member path: {member_name}") - - parts = [part for part in posix_path.parts if part not in {"", "."}] - if not parts or any(part == ".." for part in parts): - raise ValueError(f"Unsafe archive member path: {member_name}") - return parts - - -def _safe_extract_tar(tar: tarfile.TarFile, target_dir: Path) -> None: - """Extract a tar archive without allowing traversal or link entries.""" - target_dir.mkdir(parents=True, exist_ok=True) - target_root = target_dir.resolve() - - for member in tar.getmembers(): - parts = _normalize_tar_member_parts(member.name) - target = target_dir.joinpath(*parts) - target_real = target.resolve(strict=False) - - try: - target_real.relative_to(target_root) - except ValueError as exc: - raise ValueError(f"Unsafe archive member path: {member.name}") from exc - - if member.isdir(): - target_real.mkdir(parents=True, exist_ok=True) - continue - - if not member.isfile(): - raise ValueError(f"Unsupported archive member type: {member.name}") - - target_real.parent.mkdir(parents=True, exist_ok=True) - extracted = tar.extractfile(member) - if extracted is None: - raise ValueError(f"Cannot read archive member: {member.name}") - - with extracted, open(target_real, "wb") as dst: - shutil.copyfileobj(extracted, dst) - - try: - os.chmod(target_real, member.mode & 0o777) - except OSError: - pass - - -def _extract_base64_tar(b64_data: str, target_dir: Path): - """Extract a base64-encoded tar.gz archive into target_dir.""" - if not b64_data: - return - raw = base64.b64decode(b64_data) - buf = io.BytesIO(raw) - with tarfile.open(fileobj=buf, mode="r:gz") as tar: - _safe_extract_tar(tar, target_dir) - - -# ============================================================================= -# Main Environment -# ============================================================================= - -class TerminalBench2EvalEnv(HermesAgentBaseEnv): - """ - Terminal-Bench 2.0 evaluation environment (eval-only, no training). - - Inherits from HermesAgentBaseEnv for: - - Terminal backend setup (os.environ["TERMINAL_ENV"]) - - Tool resolution via _resolve_tools_for_group() - - Monkey patches for async-safe tool operation - - Wandb trajectory formatting - - The evaluate flow (triggered by `environment.py evaluate`): - 1. setup() -- Load dataset from HuggingFace - 2. evaluate() -- Run all tasks through rollout_and_score_eval() - - Each task in rollout_and_score_eval(): - 1. Resolve Docker image (pre-built Hub image or Dockerfile fallback) - 2. Register per-task Modal sandbox override - 3. Run HermesAgentLoop with terminal + file tools - 4. Upload test suite and execute test.sh in the same sandbox - 5. Check /logs/verifier/reward.txt for pass/fail - 6. Clean up sandbox, overrides, and temp files - """ - - name = "terminal-bench-2" - env_config_cls = TerminalBench2EvalConfig - - @classmethod - def config_init(cls) -> Tuple[TerminalBench2EvalConfig, List[APIServerConfig]]: - """ - Default configuration for Terminal-Bench 2.0 evaluation. - - Uses eval-only settings: - - eval_handling=STOP_TRAIN so the eval flow runs cleanly - - steps_per_eval=1, total_steps=1 so eval triggers immediately - - group_size=1 (one rollout per group, each task is expensive) - - Uses Modal terminal backend (cloud-isolated sandbox per task) and - OpenRouter with Claude for inference. - """ - env_config = TerminalBench2EvalConfig( - # Terminal + file tools only (the agent interacts via shell commands) - enabled_toolsets=["terminal", "file"], - disabled_toolsets=None, - distribution=None, - - # Agent settings -- TB2 tasks are complex, need many turns - max_agent_turns=60, - max_token_length=16000, - agent_temperature=0.6, - system_prompt=None, - - # Modal backend for per-task cloud-isolated sandboxes - terminal_backend="modal", - terminal_timeout=300, # 5 min per command (builds, pip install, etc.) - - # Test execution timeout (TB2 test scripts can install deps like pytest) - test_timeout=180, - - # 89 tasks run in parallel, each needs a thread for tool calls - tool_pool_size=128, - - # --- Eval-only Atropos settings --- - # These settings make the env work as an eval-only environment: - # - STOP_TRAIN: pauses training during eval (standard for eval envs) - # - steps_per_eval=1, total_steps=1: eval triggers immediately - # - group_size=1: one rollout per group (each task is expensive) - eval_handling=EvalHandlingEnum.STOP_TRAIN, - group_size=1, - steps_per_eval=1, - total_steps=1, - - tokenizer_name="NousResearch/Hermes-3-Llama-3.1-8B", - use_wandb=True, - wandb_name="terminal-bench-2", - ensure_scores_are_not_same=False, # Binary rewards may all be 0 or 1 - ) - - # OpenRouter with Claude -- API key loaded from .env - server_configs = [ - APIServerConfig( - base_url="https://openrouter.ai/api/v1", - model_name="anthropic/claude-sonnet-4", - server_type="openai", - api_key=os.getenv("OPENROUTER_API_KEY", ""), - health_check=False, - ) - ] - - return env_config, server_configs - - # ========================================================================= - # Setup -- load dataset - # ========================================================================= - - async def setup(self): - """Load the Terminal-Bench 2.0 dataset from HuggingFace.""" - from datasets import load_dataset - - # Auto-set terminal_lifetime to task_timeout + 120s so sandboxes - # never get killed during an active task, but still get cleaned up - # promptly after the task times out. - lifetime = self.config.task_timeout + 120 - self.config.terminal_lifetime = lifetime - os.environ["TERMINAL_LIFETIME_SECONDS"] = str(lifetime) - print(f" Terminal lifetime auto-set to {lifetime}s (task_timeout + 120s)") - - print(f"Loading TB2 dataset from: {self.config.dataset_name}") - ds = load_dataset(self.config.dataset_name, split="train") - - # Apply task filters (comma-separated strings from CLI) - tasks = list(ds) - if self.config.task_filter: - allowed = {name.strip() for name in self.config.task_filter.split(",")} - tasks = [t for t in tasks if t["task_name"] in allowed] - print(f" Filtered to {len(tasks)} tasks: {sorted(allowed)}") - - # Skip tasks incompatible with the current backend (e.g., QEMU on Modal) - # plus any user-specified skip_tasks - skip = set(MODAL_INCOMPATIBLE_TASKS) if self.config.terminal_backend == "modal" else set() - if self.config.skip_tasks: - skip |= {name.strip() for name in self.config.skip_tasks.split(",")} - if skip: - before = len(tasks) - tasks = [t for t in tasks if t["task_name"] not in skip] - skipped = before - len(tasks) - if skipped > 0: - print(f" Skipped {skipped} incompatible tasks: {sorted(skip & {t['task_name'] for t in ds})}") - - self.all_eval_items = tasks - self.iter = 0 - - # Build category index for per-category metrics - self.category_index: Dict[str, List[int]] = defaultdict(list) - for i, task in enumerate(self.all_eval_items): - self.category_index[task.get("category", "unknown")].append(i) - - # Reward tracking for wandb logging - self.eval_metrics: List[Tuple[str, float]] = [] - - # Streaming JSONL writer -- saves each task's full conversation - # immediately on completion so data is preserved even on Ctrl+C. - # Timestamped filename so each run produces a unique file. - import datetime - log_dir = os.path.join(os.path.dirname(__file__), "logs") - os.makedirs(log_dir, exist_ok=True) - run_ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") - self._streaming_path = os.path.join(log_dir, f"samples_{run_ts}.jsonl") - self._streaming_file = open(self._streaming_path, "w", encoding="utf-8") - self._streaming_lock = __import__("threading").Lock() - print(f" Streaming results to: {self._streaming_path}") - - print(f"TB2 ready: {len(self.all_eval_items)} tasks across {len(self.category_index)} categories") - for cat, indices in sorted(self.category_index.items()): - print(f" {cat}: {len(indices)} tasks") - - def _save_result(self, result: Dict[str, Any]): - """Write a single task result to the streaming JSONL file immediately.""" - if not hasattr(self, "_streaming_file") or self._streaming_file.closed: - return - with self._streaming_lock: - self._streaming_file.write(json.dumps(result, ensure_ascii=False, default=str) + "\n") - self._streaming_file.flush() - - # ========================================================================= - # Training pipeline stubs -- NOT used in eval-only mode - # ========================================================================= - # These satisfy the abstract method requirements from HermesAgentBaseEnv. - # The evaluate subcommand calls setup() -> evaluate() directly, bypassing - # the training pipeline entirely. - - async def get_next_item(self): - """Return next item (stub -- not used in eval-only mode).""" - item = self.all_eval_items[self.iter % len(self.all_eval_items)] - self.iter += 1 - return item - - def format_prompt(self, item: Dict[str, Any]) -> str: - """Return the task's instruction as the user prompt.""" - return item["instruction"] - - async def compute_reward(self, item, result, ctx) -> float: - """Compute reward (stub -- actual verification is in rollout_and_score_eval).""" - return 0.0 - - async def collect_trajectories(self, item): - """Collect trajectories (stub -- not used in eval-only mode).""" - return None, [] - - async def score(self, rollout_group_data): - """Score rollouts (stub -- not used in eval-only mode).""" - return None - - # ========================================================================= - # Docker image resolution - # ========================================================================= - - def _resolve_task_image( - self, item: Dict[str, Any], task_name: str - ) -> Tuple[str, Optional[Path]]: - """ - Resolve the Docker image for a task, with fallback to Dockerfile. - - Strategy (mirrors Harbor's approach): - 1. If force_build=True, always build from Dockerfile in environment_tar - 2. If docker_image is available, use the pre-built Docker Hub image (fast) - 3. Otherwise, extract Dockerfile from environment_tar and build (slow) - - Returns: - (modal_image, temp_dir) -- modal_image is a Docker Hub name or a - Dockerfile path. temp_dir is set if we extracted files that need - cleanup later. - """ - docker_image = item.get("docker_image", "") - environment_tar = item.get("environment_tar", "") - - # Fast path: use pre-built Docker Hub image - if docker_image and not self.config.force_build: - logger.info("Task %s: using pre-built image %s", task_name, docker_image) - return docker_image, None - - # Slow path: extract Dockerfile from environment_tar and build - if environment_tar: - task_dir = Path(tempfile.mkdtemp(prefix=f"tb2-{task_name}-")) - _extract_base64_tar(environment_tar, task_dir) - dockerfile_path = task_dir / "Dockerfile" - if dockerfile_path.exists(): - logger.info( - "Task %s: building from Dockerfile (force_build=%s, docker_image=%s)", - task_name, self.config.force_build, bool(docker_image), - ) - return str(dockerfile_path), task_dir - - # Neither available -- fall back to Hub image if force_build was True - if docker_image: - logger.warning( - "Task %s: force_build=True but no environment_tar, " - "falling back to docker_image %s", task_name, docker_image, - ) - return docker_image, None - - return "", None - - # ========================================================================= - # Per-task evaluation -- agent loop + test verification - # ========================================================================= - - async def rollout_and_score_eval(self, eval_item: Dict[str, Any]) -> Dict: - """ - Evaluate a single TB2 task: run the agent loop, then verify with tests. - - This is the core evaluation method. For each task it: - 1. Resolves the Docker image and registers the Modal sandbox override - 2. Runs HermesAgentLoop with terminal + file tools - 3. Uploads the test suite into the sandbox - 4. Executes test.sh and checks the result - 5. Cleans up the sandbox and temp files - - Args: - eval_item: A single TB2 task dict from the dataset - - Returns: - Dict with 'passed' (bool), 'reward' (float), 'task_name' (str), - 'category' (str), and optional debug info - """ - task_name = eval_item.get("task_name", "unknown") - category = eval_item.get("category", "unknown") - task_id = str(uuid.uuid4()) - task_dir = None # Set if we extract a Dockerfile (needs cleanup) - - from tqdm import tqdm - tqdm.write(f" [START] {task_name} (task_id={task_id[:8]})") - task_start = time.time() - - try: - # --- 1. Resolve Docker image --- - modal_image, task_dir = self._resolve_task_image(eval_item, task_name) - if not modal_image: - logger.error("Task %s: no docker_image or environment_tar, skipping", task_name) - return { - "passed": False, "reward": 0.0, - "task_name": task_name, "category": category, - "error": "no_image", - } - - # --- 2. Register per-task image override --- - # Set both modal_image and docker_image so the task image is used - # regardless of which backend is configured. - register_task_env_overrides(task_id, { - "modal_image": modal_image, - "docker_image": modal_image, - "cwd": "/app", - }) - logger.info( - "Task %s: registered image override for task_id %s", - task_name, task_id[:8], - ) - - # --- 3. Resolve tools and build messages --- - tools, valid_names = self._resolve_tools_for_group() - - messages: List[Dict[str, Any]] = [] - if self.config.system_prompt: - messages.append({"role": "system", "content": self.config.system_prompt}) - messages.append({"role": "user", "content": self.format_prompt(eval_item)}) - - # --- 4. Run agent loop --- - # Use ManagedServer (Phase 2) for vLLM/SGLang backends to get - # token-level tracking via /generate. Falls back to direct - # ServerManager (Phase 1) for OpenAI endpoints. - if self._use_managed_server(): - async with self.server.managed_server( - tokenizer=self.tokenizer, - preserve_think_blocks=bool(self.config.thinking_mode), - ) as managed: - agent = HermesAgentLoop( - server=managed, - tool_schemas=tools, - valid_tool_names=valid_names, - max_turns=self.config.max_agent_turns, - task_id=task_id, - temperature=self.config.agent_temperature, - max_tokens=self.config.max_token_length, - extra_body=self.config.extra_body, - budget_config=self.config.build_budget_config(), - ) - result = await agent.run(messages) - else: - agent = HermesAgentLoop( - server=self.server, - tool_schemas=tools, - valid_tool_names=valid_names, - max_turns=self.config.max_agent_turns, - task_id=task_id, - temperature=self.config.agent_temperature, - max_tokens=self.config.max_token_length, - extra_body=self.config.extra_body, - budget_config=self.config.build_budget_config(), - ) - result = await agent.run(messages) - - # --- 5. Verify -- run test suite in the agent's sandbox --- - # Skip verification if the agent produced no meaningful output - only_system_and_user = all( - msg.get("role") in {"system", "user"} for msg in result.messages - ) - if result.turns_used == 0 or only_system_and_user: - logger.warning( - "Task %s: agent produced no output (turns=%d). Reward=0.", - task_name, result.turns_used, - ) - reward = 0.0 - else: - # Run tests in a thread so the blocking ctx.terminal() calls - # don't freeze the entire event loop (which would stall all - # other tasks, tqdm updates, and timeout timers). - ctx = ToolContext(task_id) - try: - loop = asyncio.get_running_loop() - reward = await loop.run_in_executor( - None, # default thread pool - self._run_tests, eval_item, ctx, task_name, - ) - except Exception as e: - logger.error("Task %s: test verification failed: %s", task_name, e) - reward = 0.0 - finally: - ctx.cleanup() - - passed = reward == 1.0 - status = "PASS" if passed else "FAIL" - elapsed = time.time() - task_start - tqdm.write(f" [{status}] {task_name} (turns={result.turns_used}, {elapsed:.0f}s)") - logger.info( - "Task %s: reward=%.1f, turns=%d, finished=%s", - task_name, reward, result.turns_used, result.finished_naturally, - ) - - out = { - "passed": passed, - "reward": reward, - "task_name": task_name, - "category": category, - "turns_used": result.turns_used, - "finished_naturally": result.finished_naturally, - "messages": result.messages, - } - self._save_result(out) - return out - - except Exception as e: - elapsed = time.time() - task_start - logger.error("Task %s: rollout failed: %s", task_name, e, exc_info=True) - tqdm.write(f" [ERROR] {task_name}: {e} ({elapsed:.0f}s)") - out = { - "passed": False, "reward": 0.0, - "task_name": task_name, "category": category, - "error": str(e), - } - self._save_result(out) - return out - - finally: - # --- Cleanup: clear overrides, sandbox, and temp files --- - clear_task_env_overrides(task_id) - try: - cleanup_vm(task_id) - except Exception as e: - logger.debug("VM cleanup for %s: %s", task_id[:8], e) - if task_dir and task_dir.exists(): - shutil.rmtree(task_dir, ignore_errors=True) - - def _run_tests( - self, item: Dict[str, Any], ctx: ToolContext, task_name: str - ) -> float: - """ - Upload and execute the test suite in the agent's sandbox, then - download the verifier output locally to read the reward. - - Follows Harbor's verification pattern: - 1. Upload tests/ directory into the sandbox - 2. Execute test.sh inside the sandbox - 3. Download /logs/verifier/ directory to a local temp dir - 4. Read reward.txt locally with native Python I/O - - Downloading locally avoids issues with the file_read tool on - the Modal VM and matches how Harbor handles verification. - - TB2 test scripts (test.sh) typically: - 1. Install pytest via uv/pip - 2. Run pytest against the test files in /tests/ - 3. Write results to /logs/verifier/reward.txt - - Args: - item: The TB2 task dict (contains tests_tar, test_sh) - ctx: ToolContext scoped to this task's sandbox - task_name: For logging - - Returns: - 1.0 if tests pass, 0.0 otherwise - """ - tests_tar = item.get("tests_tar", "") - test_sh = item.get("test_sh", "") - - if not test_sh: - logger.warning("Task %s: no test_sh content, reward=0", task_name) - return 0.0 - - # Create required directories in the sandbox - ctx.terminal("mkdir -p /tests /logs/verifier") - - # Upload test files into the sandbox (binary-safe via base64) - if tests_tar: - tests_temp = Path(tempfile.mkdtemp(prefix=f"tb2-tests-{task_name}-")) - try: - _extract_base64_tar(tests_tar, tests_temp) - ctx.upload_dir(str(tests_temp), "/tests") - except Exception as e: - logger.warning("Task %s: failed to upload test files: %s", task_name, e) - finally: - shutil.rmtree(tests_temp, ignore_errors=True) - - # Write the test runner script (test.sh) - ctx.write_file("/tests/test.sh", test_sh) - ctx.terminal("chmod +x /tests/test.sh") - - # Execute the test suite - logger.info( - "Task %s: running test suite (timeout=%ds)", - task_name, self.config.test_timeout, - ) - test_result = ctx.terminal( - "bash /tests/test.sh", - timeout=self.config.test_timeout, - ) - - exit_code = test_result.get("exit_code", -1) - output = test_result.get("output", "") - - # Download the verifier output directory locally, then read reward.txt - # with native Python I/O. This avoids issues with file_read on the - # Modal VM and matches Harbor's verification pattern. - reward = 0.0 - local_verifier_dir = Path(tempfile.mkdtemp(prefix=f"tb2-verifier-{task_name}-")) - try: - ctx.download_dir("/logs/verifier", str(local_verifier_dir)) - - reward_file = local_verifier_dir / "reward.txt" - if reward_file.exists() and reward_file.stat().st_size > 0: - content = reward_file.read_text().strip() - if content == "1": - reward = 1.0 - elif content == "0": - reward = 0.0 - else: - # Unexpected content -- try parsing as float - try: - reward = float(content) - except (ValueError, TypeError): - logger.warning( - "Task %s: reward.txt content unexpected (%r), " - "falling back to exit_code=%d", - task_name, content, exit_code, - ) - reward = 1.0 if exit_code == 0 else 0.0 - else: - # reward.txt not written -- fall back to exit code - logger.warning( - "Task %s: reward.txt not found after download, " - "falling back to exit_code=%d", - task_name, exit_code, - ) - reward = 1.0 if exit_code == 0 else 0.0 - except Exception as e: - logger.warning( - "Task %s: failed to download verifier dir: %s, " - "falling back to exit_code=%d", - task_name, e, exit_code, - ) - reward = 1.0 if exit_code == 0 else 0.0 - finally: - shutil.rmtree(local_verifier_dir, ignore_errors=True) - - # Log test output for debugging failures - if reward == 0.0: - output_preview = output[-500:] if output else "(no output)" - logger.info( - "Task %s: FAIL (exit_code=%d)\n%s", - task_name, exit_code, output_preview, - ) - - return reward - - # ========================================================================= - # Evaluate -- main entry point for the eval subcommand - # ========================================================================= - - async def _eval_with_timeout(self, item: Dict[str, Any]) -> Dict: - """ - Wrap rollout_and_score_eval with a per-task wall-clock timeout. - - If the task exceeds task_timeout seconds, it's automatically scored - as FAIL. This prevents any single task from hanging indefinitely. - """ - task_name = item.get("task_name", "unknown") - category = item.get("category", "unknown") - try: - return await asyncio.wait_for( - self.rollout_and_score_eval(item), - timeout=self.config.task_timeout, - ) - except asyncio.TimeoutError: - from tqdm import tqdm - elapsed = self.config.task_timeout - tqdm.write(f" [TIMEOUT] {task_name} (exceeded {elapsed}s wall-clock limit)") - logger.error("Task %s: wall-clock timeout after %ds", task_name, elapsed) - out = { - "passed": False, "reward": 0.0, - "task_name": task_name, "category": category, - "error": f"timeout ({elapsed}s)", - } - self._save_result(out) - return out - - async def evaluate(self, *args, **kwargs) -> None: - """ - Run Terminal-Bench 2.0 evaluation over all tasks. - - This is the main entry point when invoked via: - python environments/terminalbench2_env.py evaluate - - Runs all tasks through rollout_and_score_eval() via asyncio.gather() - (same pattern as GPQA and other Atropos eval envs). Each task is - wrapped with a wall-clock timeout so hung tasks auto-fail. - - Suppresses noisy Modal/terminal output (HERMES_QUIET) so the tqdm - bar stays visible. - """ - start_time = time.time() - - # Route all logging through tqdm.write() so the progress bar stays - # pinned at the bottom while log lines scroll above it. - from tqdm import tqdm - - class _TqdmHandler(logging.Handler): - def emit(self, record): - try: - tqdm.write(self.format(record)) - except Exception: - self.handleError(record) - - handler = _TqdmHandler() - handler.setFormatter(logging.Formatter( - "%(asctime)s [%(name)s] %(levelname)s: %(message)s", - datefmt="%H:%M:%S", - )) - root = logging.getLogger() - root.handlers = [handler] # Replace any existing handlers - root.setLevel(logging.INFO) - - # Silence noisy third-party loggers that flood the output - logging.getLogger("httpx").setLevel(logging.WARNING) # Every HTTP request - logging.getLogger("openai").setLevel(logging.WARNING) # OpenAI client retries - logging.getLogger("rex-deploy").setLevel(logging.WARNING) # Swerex deployment - logging.getLogger("rex_image_builder").setLevel(logging.WARNING) # Image builds - - print(f"\n{'='*60}") - print("Starting Terminal-Bench 2.0 Evaluation") - print(f"{'='*60}") - print(f" Dataset: {self.config.dataset_name}") - print(f" Total tasks: {len(self.all_eval_items)}") - print(f" Max agent turns: {self.config.max_agent_turns}") - print(f" Task timeout: {self.config.task_timeout}s") - print(f" Terminal backend: {self.config.terminal_backend}") - print(f" Tool thread pool: {self.config.tool_pool_size}") - print(f" Terminal timeout: {self.config.terminal_timeout}s/cmd") - print(f" Terminal lifetime: {self.config.terminal_lifetime}s (auto: task_timeout + 120)") - print(f" Max concurrent tasks: {self.config.max_concurrent_tasks}") - print(f"{'='*60}\n") - - # Semaphore to limit concurrent Modal sandbox creations. - # Without this, all 86 tasks fire simultaneously, each creating a Modal - # sandbox via asyncio.run() inside a thread pool worker. Modal's blocking - # calls (App.lookup, etc.) deadlock when too many are created at once. - semaphore = asyncio.Semaphore(self.config.max_concurrent_tasks) - - async def _eval_with_semaphore(item): - async with semaphore: - return await self._eval_with_timeout(item) - - # Fire all tasks with wall-clock timeout, track live accuracy on the bar - total_tasks = len(self.all_eval_items) - eval_tasks = [ - asyncio.ensure_future(_eval_with_semaphore(item)) - for item in self.all_eval_items - ] - - results = [] - passed_count = 0 - pbar = tqdm(total=total_tasks, desc="Evaluating TB2", dynamic_ncols=True) - try: - for coro in asyncio.as_completed(eval_tasks): - result = await coro - results.append(result) - if result and result.get("passed"): - passed_count += 1 - done = len(results) - pct = (passed_count / done * 100) if done else 0 - pbar.set_postfix_str(f"pass={passed_count}/{done} ({pct:.1f}%)") - pbar.update(1) - except (KeyboardInterrupt, asyncio.CancelledError): - pbar.close() - print(f"\n\nInterrupted! Cleaning up {len(eval_tasks)} tasks...") - # Cancel all pending tasks - for task in eval_tasks: - task.cancel() - # Let cancellations propagate (finally blocks run cleanup_vm) - await asyncio.gather(*eval_tasks, return_exceptions=True) - # Belt-and-suspenders: clean up any remaining sandboxes - from tools.terminal_tool import cleanup_all_environments - cleanup_all_environments() - print("All sandboxes cleaned up.") - return - finally: - pbar.close() - - end_time = time.time() - - # Filter out None results (shouldn't happen, but be safe) - valid_results = [r for r in results if r is not None] - - if not valid_results: - print("Warning: No valid evaluation results obtained") - return - - # ---- Compute metrics ---- - total = len(valid_results) - passed = sum(1 for r in valid_results if r.get("passed")) - overall_pass_rate = passed / total if total > 0 else 0.0 - - # Per-category breakdown - cat_results: Dict[str, List[Dict]] = defaultdict(list) - for r in valid_results: - cat_results[r.get("category", "unknown")].append(r) - - # Build metrics dict - eval_metrics = { - "eval/pass_rate": overall_pass_rate, - "eval/total_tasks": total, - "eval/passed_tasks": passed, - "eval/evaluation_time_seconds": end_time - start_time, - } - - # Per-category metrics - for category, cat_items in sorted(cat_results.items()): - cat_passed = sum(1 for r in cat_items if r.get("passed")) - cat_total = len(cat_items) - cat_pass_rate = cat_passed / cat_total if cat_total > 0 else 0.0 - cat_key = category.replace(" ", "_").replace("-", "_").lower() - eval_metrics[f"eval/pass_rate_{cat_key}"] = cat_pass_rate - - # Store metrics for wandb_log - self.eval_metrics = list(eval_metrics.items()) - - # ---- Print summary ---- - print(f"\n{'='*60}") - print("Terminal-Bench 2.0 Evaluation Results") - print(f"{'='*60}") - print(f"Overall Pass Rate: {overall_pass_rate:.4f} ({passed}/{total})") - print(f"Evaluation Time: {end_time - start_time:.1f} seconds") - - print("\nCategory Breakdown:") - for category, cat_items in sorted(cat_results.items()): - cat_passed = sum(1 for r in cat_items if r.get("passed")) - cat_total = len(cat_items) - cat_rate = cat_passed / cat_total if cat_total > 0 else 0.0 - print(f" {category}: {cat_rate:.1%} ({cat_passed}/{cat_total})") - - # Print individual task results - print("\nTask Results:") - for r in sorted(valid_results, key=lambda x: x.get("task_name", "")): - status = "PASS" if r.get("passed") else "FAIL" - turns = r.get("turns_used", "?") - error = r.get("error", "") - extra = f" (error: {error})" if error else "" - print(f" [{status}] {r['task_name']} (turns={turns}){extra}") - - print(f"{'='*60}\n") - - # Build sample records for evaluate_log (includes full conversations) - samples = [ - { - "task_name": r.get("task_name"), - "category": r.get("category"), - "passed": r.get("passed"), - "reward": r.get("reward"), - "turns_used": r.get("turns_used"), - "error": r.get("error"), - "messages": r.get("messages"), - } - for r in valid_results - ] - - # Log evaluation results - try: - await self.evaluate_log( - metrics=eval_metrics, - samples=samples, - start_time=start_time, - end_time=end_time, - generation_parameters={ - "temperature": self.config.agent_temperature, - "max_tokens": self.config.max_token_length, - "max_agent_turns": self.config.max_agent_turns, - "terminal_backend": self.config.terminal_backend, - }, - ) - except Exception as e: - print(f"Error logging evaluation results: {e}") - - # Close streaming file - if hasattr(self, "_streaming_file") and not self._streaming_file.closed: - self._streaming_file.close() - print(f" Live results saved to: {self._streaming_path}") - - # Kill all remaining sandboxes. Timed-out tasks leave orphaned thread - # pool workers still executing commands -- cleanup_all stops them. - from tools.terminal_tool import cleanup_all_environments - print("\nCleaning up all sandboxes...") - cleanup_all_environments() - - # Shut down the tool thread pool so orphaned workers from timed-out - # tasks are killed immediately instead of retrying against dead - # sandboxes and spamming the console with TimeoutError warnings. - from environments.agent_loop import _tool_executor - _tool_executor.shutdown(wait=False, cancel_futures=True) - print("Done.") - - # ========================================================================= - # Wandb logging - # ========================================================================= - - async def wandb_log(self, wandb_metrics: Optional[Dict] = None): - """Log TB2-specific metrics to wandb.""" - if wandb_metrics is None: - wandb_metrics = {} - - # Add stored eval metrics - for metric_name, metric_value in self.eval_metrics: - wandb_metrics[metric_name] = metric_value - self.eval_metrics = [] - - await super().wandb_log(wandb_metrics) - - -if __name__ == "__main__": - TerminalBench2EvalEnv.cli() diff --git a/environments/benchmarks/yc_bench/README.md b/environments/benchmarks/yc_bench/README.md deleted file mode 100644 index 7a8aba7874d7..000000000000 --- a/environments/benchmarks/yc_bench/README.md +++ /dev/null @@ -1,115 +0,0 @@ -# YC-Bench: Long-Horizon Agent Benchmark - -[YC-Bench](https://github.com/collinear-ai/yc-bench) by [Collinear AI](https://collinear.ai/) is a deterministic, long-horizon benchmark that tests LLM agents' ability to act as a tech startup CEO. The agent manages a simulated company over 1-3 years, making compounding decisions about resource allocation, cash flow, task management, and prestige specialisation across 4 skill domains. - -Unlike TerminalBench2 (which evaluates per-task coding ability with binary pass/fail), YC-Bench measures **long-term strategic coherence** โ€” whether an agent can maintain consistent strategy, manage compounding consequences, and adapt plans over hundreds of turns. - -## Setup - -```bash -# Install yc-bench (optional dependency) -pip install "hermes-agent[yc-bench]" - -# Or install from source -git clone https://github.com/collinear-ai/yc-bench -cd yc-bench && pip install -e . - -# Verify -yc-bench --help -``` - -## Running - -```bash -# From the repo root: -bash environments/benchmarks/yc_bench/run_eval.sh - -# Or directly: -python environments/benchmarks/yc_bench/yc_bench_env.py evaluate \ - --config environments/benchmarks/yc_bench/default.yaml - -# Override model: -bash environments/benchmarks/yc_bench/run_eval.sh \ - --openai.model_name anthropic/claude-opus-4-20250514 - -# Quick single-preset test: -bash environments/benchmarks/yc_bench/run_eval.sh \ - --env.presets '["fast_test"]' --env.seeds '[1]' -``` - -## How It Works - -### Architecture - -``` -HermesAgentLoop (our agent) - -> terminal tool -> subprocess("yc-bench company status") -> JSON output - -> terminal tool -> subprocess("yc-bench task accept --task-id X") -> JSON - -> terminal tool -> subprocess("yc-bench sim resume") -> JSON (advance time) - -> ... (100-500 turns per run) -``` - -The environment initialises the simulation via `yc-bench sim init` (NOT `yc-bench run`, which would start yc-bench's own built-in agent loop). Our `HermesAgentLoop` then drives all interaction through CLI commands. - -### Simulation Mechanics - -- **4 skill domains**: research, inference, data_environment, training -- **Prestige system** (1.0-10.0): Gates access to higher-paying tasks -- **Employee management**: Junior/Mid/Senior with domain-specific skill rates -- **Throughput splitting**: `effective_rate = base_rate / N` active tasks per employee -- **Financial pressure**: Monthly payroll, bankruptcy = game over -- **Deterministic**: SHA256-based RNG โ€” same seed + preset = same world - -### Difficulty Presets - -| Preset | Employees | Tasks | Focus | -|-----------|-----------|-------|-------| -| tutorial | 3 | 50 | Basic loop mechanics | -| easy | 5 | 100 | Throughput awareness | -| **medium**| 5 | 150 | Prestige climbing + domain specialisation | -| **hard** | 7 | 200 | Precise ETA reasoning | -| nightmare | 8 | 300 | Sustained perfection under payroll pressure | -| fast_test | (varies) | (varies) | Quick validation (~50 turns) | - -Default eval runs **fast_test + medium + hard** ร— 3 seeds = 9 runs. - -### Scoring - -``` -composite = 0.5 ร— survival + 0.5 ร— normalised_funds -``` - -- **Survival** (binary): Did the company avoid bankruptcy? -- **Normalised funds** (0.0-1.0): Log-scale relative to initial $250K capital - -## Configuration - -Key fields in `default.yaml`: - -| Field | Default | Description | -|-------|---------|-------------| -| `presets` | `["fast_test", "medium", "hard"]` | Which presets to evaluate | -| `seeds` | `[1, 2, 3]` | RNG seeds per preset | -| `max_agent_turns` | 200 | Max LLM calls per run | -| `run_timeout` | 3600 | Wall-clock timeout per run (seconds) | -| `survival_weight` | 0.5 | Weight of survival in composite score | -| `funds_weight` | 0.5 | Weight of normalised funds in composite | -| `horizon_years` | null | Override horizon (null = auto from preset) | - -## Cost & Time Estimates - -Each run is 100-500 LLM turns. Approximate costs per run at typical API rates: - -| Preset | Turns | Time | Est. Cost | -|--------|-------|------|-----------| -| fast_test | ~50 | 5-10 min | $1-5 | -| medium | ~200 | 20-40 min | $5-15 | -| hard | ~300 | 30-60 min | $10-25 | - -Full default eval (9 runs): ~3-6 hours, $50-200 depending on model. - -## References - -- [collinear-ai/yc-bench](https://github.com/collinear-ai/yc-bench) โ€” Official repository -- [Collinear AI](https://collinear.ai/) โ€” Company behind yc-bench -- [TerminalBench2](../terminalbench_2/) โ€” Per-task coding benchmark (complementary) diff --git a/environments/benchmarks/yc_bench/__init__.py b/environments/benchmarks/yc_bench/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/environments/benchmarks/yc_bench/default.yaml b/environments/benchmarks/yc_bench/default.yaml deleted file mode 100644 index 4396c00ab948..000000000000 --- a/environments/benchmarks/yc_bench/default.yaml +++ /dev/null @@ -1,43 +0,0 @@ -# YC-Bench Evaluation -- Default Configuration -# -# Long-horizon agent benchmark: agent plays CEO of an AI startup over -# a simulated 1-3 year run, interacting via yc-bench CLI subcommands. -# -# Requires: pip install "hermes-agent[yc-bench]" -# -# Usage: -# python environments/benchmarks/yc_bench/yc_bench_env.py evaluate \ -# --config environments/benchmarks/yc_bench/default.yaml -# -# # Override model: -# python environments/benchmarks/yc_bench/yc_bench_env.py evaluate \ -# --config environments/benchmarks/yc_bench/default.yaml \ -# --openai.model_name anthropic/claude-opus-4-20250514 - -env: - enabled_toolsets: ["terminal"] - max_agent_turns: 200 - max_token_length: 32000 - agent_temperature: 0.0 - terminal_backend: "local" - terminal_timeout: 60 - presets: ["fast_test", "medium", "hard"] - seeds: [1, 2, 3] - run_timeout: 3600 # 60 min wall-clock per run, auto-FAIL if exceeded - survival_weight: 0.5 # weight of binary survival in composite score - funds_weight: 0.5 # weight of normalised final funds in composite score - db_dir: "/tmp/yc_bench_dbs" - company_name: "BenchCo" - start_date: "01/01/2025" # MM/DD/YYYY (yc-bench convention) - tokenizer_name: "NousResearch/Hermes-3-Llama-3.1-8B" - use_wandb: true - wandb_name: "yc-bench" - ensure_scores_are_not_same: false - data_dir_to_save_evals: "environments/benchmarks/evals/yc-bench" - -openai: - base_url: "https://openrouter.ai/api/v1" - model_name: "anthropic/claude-sonnet-4.6" - server_type: "openai" - health_check: false - # api_key loaded from OPENROUTER_API_KEY in .env diff --git a/environments/benchmarks/yc_bench/run_eval.sh b/environments/benchmarks/yc_bench/run_eval.sh deleted file mode 100755 index 0d793f53d54d..000000000000 --- a/environments/benchmarks/yc_bench/run_eval.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/bash - -# YC-Bench Evaluation -# -# Requires: pip install "hermes-agent[yc-bench]" -# -# Run from repo root: -# bash environments/benchmarks/yc_bench/run_eval.sh -# -# Override model: -# bash environments/benchmarks/yc_bench/run_eval.sh \ -# --openai.model_name anthropic/claude-opus-4-20250514 -# -# Run a single preset: -# bash environments/benchmarks/yc_bench/run_eval.sh \ -# --env.presets '["fast_test"]' --env.seeds '[1]' - -set -euo pipefail - -mkdir -p logs evals/yc-bench -LOG_FILE="logs/yc_bench_$(date +%Y%m%d_%H%M%S).log" - -echo "YC-Bench Evaluation" -echo "Log: $LOG_FILE" -echo "" - -PYTHONUNBUFFERED=1 LOGLEVEL="${LOGLEVEL:-INFO}" \ - python environments/benchmarks/yc_bench/yc_bench_env.py evaluate \ - --config environments/benchmarks/yc_bench/default.yaml \ - "$@" \ - 2>&1 | tee "$LOG_FILE" - -echo "" -echo "Log saved to: $LOG_FILE" diff --git a/environments/benchmarks/yc_bench/yc_bench_env.py b/environments/benchmarks/yc_bench/yc_bench_env.py deleted file mode 100644 index 6e7be2c899bc..000000000000 --- a/environments/benchmarks/yc_bench/yc_bench_env.py +++ /dev/null @@ -1,848 +0,0 @@ -""" -YCBenchEvalEnv -- YC-Bench Long-Horizon Agent Benchmark Environment - -Evaluates agentic LLMs on YC-Bench: a deterministic, long-horizon benchmark -where the agent acts as CEO of an AI startup over a simulated 1-3 year run. -The agent manages cash flow, employees, tasks, and prestige across 4 domains, -interacting exclusively via CLI subprocess calls against a SQLite-backed -discrete-event simulation. - -Unlike TerminalBench2 (per-task binary pass/fail), YC-Bench measures sustained -multi-turn strategic coherence -- whether an agent can manage compounding -decisions over hundreds of turns without going bankrupt. - -This is an eval-only environment. Run via: - - python environments/benchmarks/yc_bench/yc_bench_env.py evaluate \ - --config environments/benchmarks/yc_bench/default.yaml - -The evaluate flow: - 1. setup() -- Verifies yc-bench installed, builds eval matrix (preset x seed) - 2. evaluate() -- Iterates over all runs sequentially through: - a. rollout_and_score_eval() -- Per-run agent loop - - Initialises a fresh yc-bench simulation via `sim init` (NOT `run`) - - Runs HermesAgentLoop with terminal tool only - - Reads final SQLite DB to extract score - - Returns survival (0/1) + normalised funds score - b. Aggregates per-preset and overall metrics - c. Logs results via evaluate_log() and wandb - -Key features: - - CLI-only interface: agent calls yc-bench subcommands via terminal tool - - Deterministic: same seed + preset = same world (SHA256-based RNG) - - Multi-dimensional scoring: survival + normalised final funds - - Per-preset difficulty breakdown in results - - Isolated SQLite DB per run (no cross-run state leakage) - -Requires: pip install hermes-agent[yc-bench] -""" - -import asyncio -import datetime -import json -import logging -import math -import os -import sqlite3 -import subprocess -import sys -import threading -import time -import uuid -from collections import defaultdict -from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple - -_repo_root = Path(__file__).resolve().parent.parent.parent.parent -if str(_repo_root) not in sys.path: - sys.path.insert(0, str(_repo_root)) - -from pydantic import Field - -from atroposlib.envs.base import EvalHandlingEnum -from atroposlib.envs.server_handling.server_manager import APIServerConfig - -from environments.agent_loop import HermesAgentLoop -from environments.hermes_base_env import HermesAgentBaseEnv, HermesAgentEnvConfig - -logger = logging.getLogger(__name__) - -# ============================================================================= -# System prompt -# ============================================================================= - -YC_BENCH_SYSTEM_PROMPT = """\ -You are the autonomous CEO of an early-stage AI startup in a deterministic -business simulation. You manage the company exclusively through the `yc-bench` -CLI tool. Your primary goal is to **survive** until the simulation horizon ends -without going bankrupt, while **maximising final funds**. - -## Simulation Mechanics - -- **Funds**: You start with $250,000 seed capital. Revenue comes from completing - tasks. Rewards scale with your prestige: `base ร— (1 + scale ร— (prestige โˆ’ 1))`. -- **Domains**: There are 4 skill domains: **research**, **inference**, - **data_environment**, and **training**. Each has its own prestige level - (1.0-10.0). Higher prestige unlocks better-paying tasks. -- **Employees**: You have employees (Junior/Mid/Senior) with domain-specific - skill rates. **Throughput splits**: `effective_rate = base_rate / N` where N - is the number of active tasks assigned to that employee. Focus beats breadth. -- **Payroll**: Deducted automatically on the first business day of each month. - Running out of funds = bankruptcy = game over. -- **Time**: The simulation runs on business days (Mon-Fri), 09:00-18:00. - Time only advances when you call `yc-bench sim resume`. - -## Task Lifecycle - -1. Browse market tasks with `market browse` -2. Accept a task with `task accept` (this sets its deadline) -3. Assign employees with `task assign` -4. Dispatch with `task dispatch` to start work -5. Call `sim resume` to advance time and let employees make progress -6. Tasks complete when all domain requirements are fulfilled - -**Penalties for failure vary by difficulty preset.** Completing a task on time -earns full reward + prestige gain. Missing a deadline or cancelling a task -incurs prestige penalties -- cancelling is always more costly than letting a -task fail, so cancel only as a last resort. - -## CLI Commands - -### Observe -- `yc-bench company status` -- funds, prestige, runway -- `yc-bench employee list` -- skills, salary, active tasks -- `yc-bench market browse [--domain D] [--required-prestige-lte N]` -- available tasks -- `yc-bench task list [--status active|planned]` -- your tasks -- `yc-bench task inspect --task-id UUID` -- progress, deadline, assignments -- `yc-bench finance ledger [--category monthly_payroll|task_reward]` -- transaction history -- `yc-bench report monthly` -- monthly P&L - -### Act -- `yc-bench task accept --task-id UUID` -- accept from market -- `yc-bench task assign --task-id UUID --employee-id UUID` -- assign employee -- `yc-bench task dispatch --task-id UUID` -- start work (needs >=1 assignment) -- `yc-bench task cancel --task-id UUID --reason "text"` -- cancel (prestige penalty) -- `yc-bench sim resume` -- advance simulation clock - -### Memory (persists across context truncation) -- `yc-bench scratchpad read` -- read your persistent notes -- `yc-bench scratchpad write --content "text"` -- overwrite notes -- `yc-bench scratchpad append --content "text"` -- append to notes -- `yc-bench scratchpad clear` -- clear notes - -## Strategy Guidelines - -1. **Specialise in 2-3 domains** to climb the prestige ladder faster and unlock - high-reward tasks. Don't spread thin across all 4 domains early on. -2. **Focus employees** -- assigning one employee to many tasks halves their - throughput per additional task. Keep assignments concentrated. -3. **Use the scratchpad** to track your strategy, upcoming deadlines, and - employee assignments. This persists even if conversation context is truncated. -4. **Monitor runway** -- always know how many months of payroll you can cover. - Accept high-reward tasks before payroll dates. -5. **Don't over-accept** -- taking too many tasks and missing deadlines cascades - into prestige loss, locking you out of profitable contracts. -6. Use `finance ledger` and `report monthly` to track revenue trends. - -## Your Turn - -Each turn: -1. Call `yc-bench company status` and `yc-bench task list` to orient yourself. -2. Check for completed tasks and pending deadlines. -3. Browse market for profitable tasks within your prestige level. -4. Accept, assign, and dispatch tasks strategically. -5. Call `yc-bench sim resume` to advance time. -6. Repeat until the simulation ends. - -Think step by step before acting.""" - -# Starting funds in cents ($250,000) -INITIAL_FUNDS_CENTS = 25_000_000 - -# Default horizon per preset (years) -_PRESET_HORIZONS = { - "tutorial": 1, - "easy": 1, - "medium": 1, - "hard": 1, - "nightmare": 1, - "fast_test": 1, - "default": 3, - "high_reward": 1, -} - - -# ============================================================================= -# Configuration -# ============================================================================= - -class YCBenchEvalConfig(HermesAgentEnvConfig): - """ - Configuration for the YC-Bench evaluation environment. - - Extends HermesAgentEnvConfig with YC-Bench-specific settings for - preset selection, seed control, scoring, and simulation parameters. - """ - - presets: List[str] = Field( - default=["fast_test", "medium", "hard"], - description="YC-Bench preset names to evaluate.", - ) - seeds: List[int] = Field( - default=[1, 2, 3], - description="Random seeds -- each preset x seed = one run.", - ) - run_timeout: int = Field( - default=3600, - description="Maximum wall-clock seconds per run. Default 60 minutes.", - ) - survival_weight: float = Field( - default=0.5, - description="Weight of survival (0/1) in composite score.", - ) - funds_weight: float = Field( - default=0.5, - description="Weight of normalised final funds in composite score.", - ) - db_dir: str = Field( - default="/tmp/yc_bench_dbs", - description="Directory for per-run SQLite databases.", - ) - horizon_years: Optional[int] = Field( - default=None, - description=( - "Simulation horizon in years. If None (default), inferred from " - "preset name (1 year for most, 3 for 'default')." - ), - ) - company_name: str = Field( - default="BenchCo", - description="Name of the simulated company.", - ) - start_date: str = Field( - default="01/01/2025", - description="Simulation start date in MM/DD/YYYY format (yc-bench convention).", - ) - - -# ============================================================================= -# Scoring helpers -# ============================================================================= - -def _read_final_score(db_path: str) -> Dict[str, Any]: - """ - Read final game state from a YC-Bench SQLite database. - - Returns dict with final_funds_cents (int), survived (bool), - terminal_reason (str). - - Note: yc-bench table names are plural -- 'companies' not 'company', - 'sim_events' not 'simulation_log'. - """ - if not os.path.exists(db_path): - logger.warning("DB not found at %s", db_path) - return { - "final_funds_cents": 0, - "survived": False, - "terminal_reason": "db_missing", - } - - conn = None - try: - conn = sqlite3.connect(db_path) - cur = conn.cursor() - - # Read final funds from the 'companies' table - cur.execute("SELECT funds_cents FROM companies LIMIT 1") - row = cur.fetchone() - funds = row[0] if row else 0 - - # Determine terminal reason from 'sim_events' table - terminal_reason = "unknown" - try: - cur.execute( - "SELECT event_type FROM sim_events " - "WHERE event_type IN ('bankruptcy', 'horizon_end') " - "ORDER BY scheduled_at DESC LIMIT 1" - ) - event_row = cur.fetchone() - if event_row: - terminal_reason = event_row[0] - except sqlite3.OperationalError: - # Table may not exist if simulation didn't progress - pass - - survived = funds >= 0 and terminal_reason != "bankruptcy" - return { - "final_funds_cents": funds, - "survived": survived, - "terminal_reason": terminal_reason, - } - - except Exception as e: - logger.error("Failed to read DB %s: %s", db_path, e) - return { - "final_funds_cents": 0, - "survived": False, - "terminal_reason": f"db_error: {e}", - } - finally: - if conn: - conn.close() - - -def _compute_composite_score( - final_funds_cents: int, - survived: bool, - survival_weight: float = 0.5, - funds_weight: float = 0.5, - initial_funds_cents: int = INITIAL_FUNDS_CENTS, -) -> float: - """ - Compute composite score from survival and final funds. - - Score = survival_weight * survival_score - + funds_weight * normalised_funds_score - - Normalised funds uses log-scale relative to initial capital: - - funds <= 0: 0.0 - - funds == initial: ~0.15 - - funds == 10x: ~0.52 - - funds == 100x: 1.0 - """ - survival_score = 1.0 if survived else 0.0 - - if final_funds_cents <= 0: - funds_score = 0.0 - else: - max_ratio = 100.0 - ratio = final_funds_cents / max(initial_funds_cents, 1) - funds_score = min(math.log1p(ratio) / math.log1p(max_ratio), 1.0) - - return survival_weight * survival_score + funds_weight * funds_score - - -# ============================================================================= -# Main Environment -# ============================================================================= - -class YCBenchEvalEnv(HermesAgentBaseEnv): - """ - YC-Bench long-horizon agent benchmark environment (eval-only). - - Each eval item is a (preset, seed) pair. The environment initialises the - simulation via ``yc-bench sim init`` (NOT ``yc-bench run`` which would start - a competing built-in agent loop). The HermesAgentLoop then drives the - interaction by calling individual yc-bench CLI commands via the terminal tool. - - After the agent loop ends, the SQLite DB is read to extract the final score. - - Scoring: - composite = 0.5 * survival + 0.5 * normalised_funds - """ - - name = "yc-bench" - env_config_cls = YCBenchEvalConfig - - @classmethod - def config_init(cls) -> Tuple[YCBenchEvalConfig, List[APIServerConfig]]: - env_config = YCBenchEvalConfig( - enabled_toolsets=["terminal"], - disabled_toolsets=None, - distribution=None, - max_agent_turns=200, - max_token_length=32000, - agent_temperature=0.0, - system_prompt=YC_BENCH_SYSTEM_PROMPT, - terminal_backend="local", - terminal_timeout=60, - presets=["fast_test", "medium", "hard"], - seeds=[1, 2, 3], - run_timeout=3600, - survival_weight=0.5, - funds_weight=0.5, - db_dir="/tmp/yc_bench_dbs", - eval_handling=EvalHandlingEnum.STOP_TRAIN, - group_size=1, - steps_per_eval=1, - total_steps=1, - tokenizer_name="NousResearch/Hermes-3-Llama-3.1-8B", - use_wandb=True, - wandb_name="yc-bench", - ensure_scores_are_not_same=False, - ) - - server_configs = [ - APIServerConfig( - base_url="https://openrouter.ai/api/v1", - model_name="anthropic/claude-sonnet-4.6", - server_type="openai", - api_key=os.getenv("OPENROUTER_API_KEY", ""), - health_check=False, - ) - ] - - return env_config, server_configs - - # ========================================================================= - # Setup - # ========================================================================= - - async def setup(self): - """Verify yc-bench is installed and build the eval matrix.""" - # Verify yc-bench CLI is available - try: - result = subprocess.run( - ["yc-bench", "--help"], capture_output=True, text=True, timeout=10 - ) - if result.returncode != 0: - raise FileNotFoundError - except (FileNotFoundError, subprocess.TimeoutExpired): - raise RuntimeError( - "yc-bench CLI not found. Install with:\n" - ' pip install "hermes-agent[yc-bench]"\n' - "Or: git clone https://github.com/collinear-ai/yc-bench " - "&& cd yc-bench && pip install -e ." - ) - print("yc-bench CLI verified.") - - # Build eval matrix: preset x seed - self.all_eval_items = [ - {"preset": preset, "seed": seed} - for preset in self.config.presets - for seed in self.config.seeds - ] - self.iter = 0 - - os.makedirs(self.config.db_dir, exist_ok=True) - self.eval_metrics: List[Tuple[str, float]] = [] - - # Streaming JSONL log for crash-safe result persistence - log_dir = os.path.join(os.path.dirname(__file__), "logs") - os.makedirs(log_dir, exist_ok=True) - run_ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") - self._streaming_path = os.path.join(log_dir, f"samples_{run_ts}.jsonl") - self._streaming_file = open(self._streaming_path, "w", encoding="utf-8") - self._streaming_lock = threading.Lock() - - print(f"\nYC-Bench eval matrix: {len(self.all_eval_items)} runs") - for item in self.all_eval_items: - print(f" preset={item['preset']!r} seed={item['seed']}") - print(f"Streaming results to: {self._streaming_path}\n") - - def _save_result(self, result: Dict[str, Any]): - """Write a single run result to the streaming JSONL file immediately.""" - if not hasattr(self, "_streaming_file") or self._streaming_file.closed: - return - with self._streaming_lock: - self._streaming_file.write( - json.dumps(result, ensure_ascii=False, default=str) + "\n" - ) - self._streaming_file.flush() - - # ========================================================================= - # Training pipeline stubs (eval-only -- not used) - # ========================================================================= - - async def get_next_item(self): - item = self.all_eval_items[self.iter % len(self.all_eval_items)] - self.iter += 1 - return item - - def format_prompt(self, item: Dict[str, Any]) -> str: - preset = item["preset"] - seed = item["seed"] - return ( - f"A new YC-Bench simulation has been initialized " - f"(preset='{preset}', seed={seed}).\n" - f"Your company '{self.config.company_name}' is ready.\n\n" - "Begin by calling:\n" - "1. `yc-bench company status` -- see your starting funds and prestige\n" - "2. `yc-bench employee list` -- see your team and their skills\n" - "3. `yc-bench market browse --required-prestige-lte 1` -- find tasks " - "you can take\n\n" - "Then accept 2-3 tasks, assign employees, dispatch them, and call " - "`yc-bench sim resume` to advance time. Repeat this loop until the " - "simulation ends (horizon reached or bankruptcy)." - ) - - async def compute_reward(self, item, result, ctx) -> float: - return 0.0 - - async def collect_trajectories(self, item): - return None, [] - - async def score(self, rollout_group_data): - return None - - # ========================================================================= - # Per-run evaluation - # ========================================================================= - - async def rollout_and_score_eval(self, eval_item: Dict[str, Any]) -> Dict: - """ - Evaluate a single (preset, seed) run. - - 1. Sets DATABASE_URL and YC_BENCH_EXPERIMENT env vars - 2. Initialises the simulation via ``yc-bench sim init`` (NOT ``run``) - 3. Runs HermesAgentLoop with terminal tool - 4. Reads SQLite DB to compute final score - 5. Returns result dict with survival, funds, and composite score - """ - preset = eval_item["preset"] - seed = eval_item["seed"] - run_id = str(uuid.uuid4())[:8] - run_key = f"{preset}_seed{seed}_{run_id}" - - from tqdm import tqdm - tqdm.write(f" [START] preset={preset!r} seed={seed} (run_id={run_id})") - run_start = time.time() - - # Isolated DB per run -- prevents cross-run state leakage - db_path = os.path.join(self.config.db_dir, f"yc_bench_{run_key}.db") - os.environ["DATABASE_URL"] = f"sqlite:///{db_path}" - os.environ["YC_BENCH_EXPERIMENT"] = preset - - # Determine horizon: explicit config override > preset lookup > default 1 - horizon = self.config.horizon_years or _PRESET_HORIZONS.get(preset, 1) - - try: - # ---------------------------------------------------------- - # Step 1: Initialise the simulation via CLI - # IMPORTANT: We use `sim init`, NOT `yc-bench run`. - # `yc-bench run` starts yc-bench's own LLM agent loop (via - # LiteLLM), which would compete with our HermesAgentLoop. - # `sim init` just sets up the world and returns. - # ---------------------------------------------------------- - init_cmd = [ - "yc-bench", "sim", "init", - "--seed", str(seed), - "--start-date", self.config.start_date, - "--company-name", self.config.company_name, - "--horizon-years", str(horizon), - ] - init_result = subprocess.run( - init_cmd, capture_output=True, text=True, timeout=30, - ) - if init_result.returncode != 0: - error_msg = (init_result.stderr or init_result.stdout).strip() - raise RuntimeError(f"yc-bench sim init failed: {error_msg}") - - tqdm.write(f" Simulation initialized (horizon={horizon}yr)") - - # ---------------------------------------------------------- - # Step 2: Run the HermesAgentLoop - # ---------------------------------------------------------- - tools, valid_names = self._resolve_tools_for_group() - - messages: List[Dict[str, Any]] = [ - {"role": "system", "content": YC_BENCH_SYSTEM_PROMPT}, - {"role": "user", "content": self.format_prompt(eval_item)}, - ] - - agent = HermesAgentLoop( - server=self.server, - tool_schemas=tools, - valid_tool_names=valid_names, - max_turns=self.config.max_agent_turns, - task_id=run_id, - temperature=self.config.agent_temperature, - max_tokens=self.config.max_token_length, - extra_body=self.config.extra_body, - budget_config=self.config.build_budget_config(), - ) - result = await agent.run(messages) - - # ---------------------------------------------------------- - # Step 3: Read final score from the simulation DB - # ---------------------------------------------------------- - score_data = _read_final_score(db_path) - final_funds = score_data["final_funds_cents"] - survived = score_data["survived"] - terminal_reason = score_data["terminal_reason"] - - composite = _compute_composite_score( - final_funds_cents=final_funds, - survived=survived, - survival_weight=self.config.survival_weight, - funds_weight=self.config.funds_weight, - ) - - elapsed = time.time() - run_start - status = "SURVIVED" if survived else "BANKRUPT" - if final_funds >= 0: - funds_str = f"${final_funds / 100:,.0f}" - else: - funds_str = f"-${abs(final_funds) / 100:,.0f}" - - tqdm.write( - f" [{status}] preset={preset!r} seed={seed} " - f"funds={funds_str} score={composite:.3f} " - f"turns={result.turns_used} ({elapsed:.0f}s)" - ) - - out = { - "preset": preset, - "seed": seed, - "survived": survived, - "final_funds_cents": final_funds, - "final_funds_usd": final_funds / 100, - "terminal_reason": terminal_reason, - "composite_score": composite, - "turns_used": result.turns_used, - "finished_naturally": result.finished_naturally, - "elapsed_seconds": elapsed, - "db_path": db_path, - "messages": result.messages, - } - self._save_result(out) - return out - - except Exception as e: - elapsed = time.time() - run_start - logger.error("Run %s failed: %s", run_key, e, exc_info=True) - tqdm.write( - f" [ERROR] preset={preset!r} seed={seed}: {e} ({elapsed:.0f}s)" - ) - out = { - "preset": preset, - "seed": seed, - "survived": False, - "final_funds_cents": 0, - "final_funds_usd": 0.0, - "terminal_reason": f"error: {e}", - "composite_score": 0.0, - "turns_used": 0, - "error": str(e), - "elapsed_seconds": elapsed, - } - self._save_result(out) - return out - - # ========================================================================= - # Evaluate - # ========================================================================= - - async def _run_with_timeout(self, item: Dict[str, Any]) -> Dict: - """Wrap a single rollout with a wall-clock timeout.""" - preset = item["preset"] - seed = item["seed"] - try: - return await asyncio.wait_for( - self.rollout_and_score_eval(item), - timeout=self.config.run_timeout, - ) - except asyncio.TimeoutError: - from tqdm import tqdm - tqdm.write( - f" [TIMEOUT] preset={preset!r} seed={seed} " - f"(exceeded {self.config.run_timeout}s)" - ) - out = { - "preset": preset, - "seed": seed, - "survived": False, - "final_funds_cents": 0, - "final_funds_usd": 0.0, - "terminal_reason": f"timeout ({self.config.run_timeout}s)", - "composite_score": 0.0, - "turns_used": 0, - "error": "timeout", - } - self._save_result(out) - return out - - async def evaluate(self, *args, **kwargs) -> None: - """ - Run YC-Bench evaluation over all (preset, seed) combinations. - - Runs sequentially -- each run is 100-500 turns, parallelising would - be prohibitively expensive and cause env var conflicts. - """ - start_time = time.time() - from tqdm import tqdm - - # --- tqdm-compatible logging handler (TB2 pattern) --- - class _TqdmHandler(logging.Handler): - def emit(self, record): - try: - tqdm.write(self.format(record)) - except Exception: - self.handleError(record) - - root = logging.getLogger() - handler = _TqdmHandler() - handler.setFormatter( - logging.Formatter("%(levelname)s %(name)s: %(message)s") - ) - root.handlers = [handler] - for noisy in ("httpx", "openai"): - logging.getLogger(noisy).setLevel(logging.WARNING) - - # --- Print config summary --- - print(f"\n{'='*60}") - print("Starting YC-Bench Evaluation") - print(f"{'='*60}") - print(f" Presets: {self.config.presets}") - print(f" Seeds: {self.config.seeds}") - print(f" Total runs: {len(self.all_eval_items)}") - print(f" Max turns/run: {self.config.max_agent_turns}") - print(f" Run timeout: {self.config.run_timeout}s") - print(f"{'='*60}\n") - - results = [] - pbar = tqdm( - total=len(self.all_eval_items), desc="YC-Bench", dynamic_ncols=True - ) - - try: - for item in self.all_eval_items: - result = await self._run_with_timeout(item) - results.append(result) - survived_count = sum(1 for r in results if r.get("survived")) - pbar.set_postfix_str( - f"survived={survived_count}/{len(results)}" - ) - pbar.update(1) - - except (KeyboardInterrupt, asyncio.CancelledError): - tqdm.write("\n[INTERRUPTED] Stopping evaluation...") - pbar.close() - try: - from tools.terminal_tool import cleanup_all_environments - cleanup_all_environments() - except Exception: - pass - if hasattr(self, "_streaming_file") and not self._streaming_file.closed: - self._streaming_file.close() - return - - pbar.close() - end_time = time.time() - - # --- Compute metrics --- - valid = [r for r in results if r is not None] - if not valid: - print("Warning: No valid results.") - return - - total = len(valid) - survived_total = sum(1 for r in valid if r.get("survived")) - survival_rate = survived_total / total if total else 0.0 - avg_score = ( - sum(r.get("composite_score", 0) for r in valid) / total - if total - else 0.0 - ) - - preset_results: Dict[str, List[Dict]] = defaultdict(list) - for r in valid: - preset_results[r["preset"]].append(r) - - eval_metrics = { - "eval/survival_rate": survival_rate, - "eval/avg_composite_score": avg_score, - "eval/total_runs": total, - "eval/survived_runs": survived_total, - "eval/evaluation_time_seconds": end_time - start_time, - } - - for preset, items in sorted(preset_results.items()): - ps = sum(1 for r in items if r.get("survived")) - pt = len(items) - pa = ( - sum(r.get("composite_score", 0) for r in items) / pt - if pt - else 0 - ) - key = preset.replace("-", "_") - eval_metrics[f"eval/survival_rate_{key}"] = ps / pt if pt else 0 - eval_metrics[f"eval/avg_score_{key}"] = pa - - self.eval_metrics = list(eval_metrics.items()) - - # --- Print summary --- - print(f"\n{'='*60}") - print("YC-Bench Evaluation Results") - print(f"{'='*60}") - print( - f"Overall survival rate: {survival_rate:.1%} " - f"({survived_total}/{total})" - ) - print(f"Average composite score: {avg_score:.4f}") - print(f"Evaluation time: {end_time - start_time:.1f}s") - - print("\nPer-preset breakdown:") - for preset, items in sorted(preset_results.items()): - ps = sum(1 for r in items if r.get("survived")) - pt = len(items) - pa = ( - sum(r.get("composite_score", 0) for r in items) / pt - if pt - else 0 - ) - print(f" {preset}: {ps}/{pt} survived avg_score={pa:.4f}") - for r in items: - status = "SURVIVED" if r.get("survived") else "BANKRUPT" - funds = r.get("final_funds_usd", 0) - print( - f" seed={r['seed']} [{status}] " - f"${funds:,.0f} " - f"score={r.get('composite_score', 0):.3f}" - ) - - print(f"{'='*60}\n") - - # --- Log results --- - samples = [ - {k: v for k, v in r.items() if k != "messages"} for r in valid - ] - - try: - await self.evaluate_log( - metrics=eval_metrics, - samples=samples, - start_time=start_time, - end_time=end_time, - generation_parameters={ - "temperature": self.config.agent_temperature, - "max_tokens": self.config.max_token_length, - "max_agent_turns": self.config.max_agent_turns, - }, - ) - except Exception as e: - print(f"Error logging results: {e}") - - # --- Cleanup (TB2 pattern) --- - if hasattr(self, "_streaming_file") and not self._streaming_file.closed: - self._streaming_file.close() - print(f"Results saved to: {self._streaming_path}") - - try: - from tools.terminal_tool import cleanup_all_environments - cleanup_all_environments() - except Exception: - pass - - try: - from environments.agent_loop import _tool_executor - _tool_executor.shutdown(wait=False, cancel_futures=True) - except Exception: - pass - - # ========================================================================= - # Wandb logging - # ========================================================================= - - async def wandb_log(self, wandb_metrics: Optional[Dict] = None): - """Log YC-Bench-specific metrics to wandb.""" - if wandb_metrics is None: - wandb_metrics = {} - for k, v in self.eval_metrics: - wandb_metrics[k] = v - self.eval_metrics = [] - await super().wandb_log(wandb_metrics) - - -if __name__ == "__main__": - YCBenchEvalEnv.cli() diff --git a/environments/hermes_base_env.py b/environments/hermes_base_env.py deleted file mode 100644 index adefa9b7c3cf..000000000000 --- a/environments/hermes_base_env.py +++ /dev/null @@ -1,714 +0,0 @@ -""" -HermesAgentBaseEnv -- Abstract Base Environment for Hermes-Agent + Atropos - -Provides the Atropos integration plumbing that all hermes-agent environments share: -- Two-mode operation (OpenAI server for Phase 1, VLLM ManagedServer for Phase 2) -- Per-group toolset/distribution resolution -- Agent loop orchestration via HermesAgentLoop -- ToolContext creation for reward functions -- ScoredDataGroup construction from ManagedServer state - -Subclasses only need to implement: - setup() -- Load dataset, initialize state - get_next_item() -- Return the next item from the dataset - format_prompt() -- Convert a dataset item into the user message - compute_reward() -- Score the rollout (has full ToolContext access) - evaluate() -- Periodic evaluation -""" - -import asyncio -import json -import logging -import os -import sys -import uuid -from abc import abstractmethod -from pathlib import Path -from typing import Any, Dict, List, Optional, Set, Tuple, Union - -# Ensure the hermes-agent repo root is on sys.path so that imports like -# `from model_tools import ...` and `from environments.X import ...` work -# regardless of where the script is invoked from. -_repo_root = Path(__file__).resolve().parent.parent -if str(_repo_root) not in sys.path: - sys.path.insert(0, str(_repo_root)) - -from dotenv import load_dotenv -from pydantic import Field - -# Load API keys from hermes-agent/.env so all environments can access them -_env_path = _repo_root / ".env" -if _env_path.exists(): - load_dotenv(dotenv_path=_env_path) - -# Apply monkey patches for async-safe tool operation inside Atropos's event loop. -# This patches SwerexModalEnvironment to use a background thread instead of -# asyncio.run(), which would deadlock inside Atropos. Safe for normal CLI too. -from environments.patches import apply_patches -apply_patches() - -from atroposlib.envs.base import ( - BaseEnv, - BaseEnvConfig, - ScoredDataGroup, - ScoredDataItem, -) -from atroposlib.envs.server_handling.server_manager import ( - APIServerConfig, - ServerBaseline, - ServerManager, -) -from atroposlib.type_definitions import Item - -from environments.agent_loop import AgentResult, HermesAgentLoop -from environments.tool_context import ToolContext -from tools.budget_config import ( - DEFAULT_RESULT_SIZE_CHARS, - DEFAULT_TURN_BUDGET_CHARS, - DEFAULT_PREVIEW_SIZE_CHARS, -) - -# Import hermes-agent toolset infrastructure -from model_tools import get_tool_definitions -from toolset_distributions import sample_toolsets_from_distribution - -logger = logging.getLogger(__name__) - - -class HermesAgentEnvConfig(BaseEnvConfig): - """ - Configuration for hermes-agent Atropos environments. - - Extends BaseEnvConfig with agent-specific settings for toolsets, - terminal backend, dataset loading, and tool call parsing. - """ - - # --- Toolset configuration --- - # Mutually exclusive: use either enabled_toolsets OR distribution - enabled_toolsets: Optional[List[str]] = Field( - default=None, - description="Explicit list of hermes toolsets to enable (e.g., ['terminal', 'file', 'web']). " - "If None and distribution is also None, all available toolsets are enabled.", - ) - disabled_toolsets: Optional[List[str]] = Field( - default=None, - description="Toolsets to disable. Applied as a filter on top of enabled_toolsets or distribution.", - ) - distribution: Optional[str] = Field( - default=None, - description="Name of a toolset distribution from toolset_distributions.py " - "(e.g., 'development', 'terminal_tasks'). Sampled once per group. " - "Mutually exclusive with enabled_toolsets.", - ) - - # --- Agent loop configuration --- - max_agent_turns: int = Field( - default=30, - description="Maximum number of LLM calls (tool-calling iterations) per rollout.", - ) - system_prompt: Optional[str] = Field( - default=None, - description="System prompt for the agent. Tools are handled via the tools= parameter, " - "not embedded in the prompt text.", - ) - agent_temperature: float = Field( - default=1.0, - description="Sampling temperature for agent generation during rollouts.", - ) - - # --- Terminal backend --- - terminal_backend: str = Field( - default="local", - description="Terminal backend: 'local', 'docker', 'modal', 'daytona', 'ssh', 'singularity'. " - "Modal or Daytona recommended for production RL (cloud isolation per rollout).", - ) - terminal_timeout: int = Field( - default=120, - description="Per-command timeout in seconds for terminal tool calls. " - "Commands exceeding this are killed. Increase for tasks with long-running " - "commands (compilation, pip install, etc.).", - ) - terminal_lifetime: int = Field( - default=3600, - description="Sandbox inactivity lifetime in seconds. The cleanup thread kills " - "sandboxes that have been idle longer than this. Must be longer than " - "the longest gap between tool calls (e.g., waiting for LLM response).", - ) - - # --- Dataset --- - dataset_name: Optional[str] = Field( - default=None, - description="HuggingFace dataset name. Optional if tasks are defined inline.", - ) - dataset_split: str = Field( - default="train", - description="Dataset split to use.", - ) - prompt_field: str = Field( - default="prompt", - description="Which field in the dataset contains the prompt.", - ) - - # --- Thread pool --- - tool_pool_size: int = Field( - default=128, - description="Thread pool size for tool execution. Each concurrent task needs a " - "thread for tool calls. Must be large enough for parallel evaluation. " - "Too small = thread pool starvation.", - ) - - # --- Phase 2: Tool call parsing --- - tool_call_parser: str = Field( - default="hermes", - description="Tool call parser name for Phase 2 (VLLM server type). " - "Ignored in Phase 1 (OpenAI server type where VLLM parses natively). " - "Options: hermes, mistral, llama3_json, qwen, deepseek_v3, etc.", - ) - - # --- Tool result budget --- - # Defaults imported from tools.budget_config (single source of truth). - default_result_size_chars: int = Field( - default=DEFAULT_RESULT_SIZE_CHARS, - description="Default per-tool threshold (chars) for persisting large results " - "to sandbox. Results exceeding this are written to /tmp/hermes-results/ " - "and replaced with a preview. Per-tool registry values take precedence " - "unless overridden via tool_result_overrides.", - ) - turn_budget_chars: int = Field( - default=DEFAULT_TURN_BUDGET_CHARS, - description="Aggregate char budget per assistant turn. If all tool results " - "in a single turn exceed this, the largest are persisted to disk first.", - ) - preview_size_chars: int = Field( - default=DEFAULT_PREVIEW_SIZE_CHARS, - description="Size of the inline preview shown after a tool result is persisted.", - ) - tool_result_overrides: Optional[Dict[str, int]] = Field( - default=None, - description="Per-tool threshold overrides (chars). Keys are tool names, " - "values are char thresholds. Overrides both the default and registry " - "per-tool values. Example: {'terminal': 10000, 'search_files': 5000}. " - "Note: read_file is pinned to infinity and cannot be overridden.", - ) - - # --- Provider-specific parameters --- - # Passed as extra_body to the OpenAI client's chat.completions.create() call. - # Useful for OpenRouter provider preferences, transforms, route settings, etc. - # Example YAML: - # extra_body: - # provider: - # ignore: ["DeepInfra", "Fireworks"] - # order: ["Together"] - # transforms: ["middle-out"] - extra_body: Optional[Dict[str, Any]] = Field( - default=None, - description="Extra body parameters passed to the OpenAI client's " - "chat.completions.create(). Used for OpenRouter provider preferences, " - "transforms, and other provider-specific settings.", - ) - - def build_budget_config(self): - """Build a BudgetConfig from env config fields.""" - from tools.budget_config import BudgetConfig - return BudgetConfig( - default_result_size=self.default_result_size_chars, - turn_budget=self.turn_budget_chars, - preview_size=self.preview_size_chars, - tool_overrides=dict(self.tool_result_overrides) if self.tool_result_overrides else {}, - ) - - -class HermesAgentBaseEnv(BaseEnv): - """ - Abstract base environment for hermes-agent Atropos integration. - - Handles two modes of operation: - - Phase 1 (OpenAI server type): Uses server.chat_completion() directly. - The server (VLLM, SGLang, OpenRouter, OpenAI) handles tool call parsing - and reasoning extraction natively. DummyManagedServer provides placeholder - tokens. Good for SFT data gen, verifier testing, evaluation. - - - Phase 2 (VLLM server type): Uses ManagedServer for exact token IDs + logprobs - via /generate. Client-side tool call parser reconstructs structured tool_calls - from raw output. Full RL training capability. - - Subclasses must implement: - setup() -- Load dataset, initialize state - get_next_item() -- Return the next item to roll out - format_prompt() -- Convert a dataset item into the user message string - compute_reward() -- Score the rollout using ToolContext - evaluate() -- Periodic evaluation - """ - - name: Optional[str] = "hermes-agent" - env_config_cls = HermesAgentEnvConfig - - def __init__( - self, - config: HermesAgentEnvConfig, - server_configs: Union[ServerBaseline, List[APIServerConfig]], - slurm=False, - testing=False, - ): - super().__init__(config, server_configs, slurm, testing) - - # Set terminal environment variables so hermes tools pick them up. - # These can all be overridden per-environment via config fields instead - # of requiring users to set shell env vars. - if config.terminal_backend: - os.environ["TERMINAL_ENV"] = config.terminal_backend - os.environ["TERMINAL_TIMEOUT"] = str(config.terminal_timeout) - os.environ["TERMINAL_LIFETIME_SECONDS"] = str(config.terminal_lifetime) - print( - f"๐Ÿ–ฅ๏ธ Terminal: backend={config.terminal_backend}, " - f"timeout={config.terminal_timeout}s, lifetime={config.terminal_lifetime}s" - ) - - # Resize the agent loop's thread pool for tool execution. - # This must be large enough for the number of concurrent tasks - # (e.g., 89 parallel TB2 eval tasks each need a thread for tool calls). - from environments.agent_loop import resize_tool_pool - resize_tool_pool(config.tool_pool_size) - - # Set tool_parser on the ServerManager so ManagedServer uses it - # for bidirectional tool call translation (raw text โ†” OpenAI tool_calls). - if hasattr(self.server, 'tool_parser'): - self.server.tool_parser = config.tool_call_parser - print(f"๐Ÿ”ง Tool parser: {config.tool_call_parser}") - - # Current group's resolved tools (set in collect_trajectories) - self._current_group_tools: Optional[Tuple[List[Dict], Set[str]]] = None - - # Tool error tracking for wandb logging - self._tool_error_buffer: List[Dict[str, Any]] = [] - - # ========================================================================= - # Toolset resolution (per-group) - # ========================================================================= - - def _resolve_tools_for_group(self) -> Tuple[List[Dict[str, Any]], Set[str]]: - """ - Resolve toolsets for a group. Called once in collect_trajectories(), - then shared by all collect_trajectory() calls in the group. - - If distribution is set, samples probabilistically. - If enabled_toolsets is set, uses that explicit list. - disabled_toolsets is applied as a filter on top. - - Returns: - (tool_schemas, valid_tool_names) tuple - """ - config = self.config - - if config.distribution: - group_toolsets = sample_toolsets_from_distribution(config.distribution) - logger.info("Sampled toolsets from '%s': %s", config.distribution, group_toolsets) - else: - group_toolsets = config.enabled_toolsets # None means "all available" - if group_toolsets is None: - logger.warning( - "enabled_toolsets is None -- loading ALL tools including messaging. " - "Set explicit enabled_toolsets for RL training." - ) - - tools = get_tool_definitions( - enabled_toolsets=group_toolsets, - disabled_toolsets=config.disabled_toolsets, - quiet_mode=True, - ) - - valid_names = {t["function"]["name"] for t in tools} if tools else set() - logger.info("Resolved %d tools for group: %s", len(valid_names), sorted(valid_names)) - return tools, valid_names - - # ========================================================================= - # Server mode detection - # ========================================================================= - - def _use_managed_server(self) -> bool: - """ - Determine if we should use ManagedServer (Phase 2) or direct server (Phase 1). - - Phase 2 (ManagedServer) is used when the server type is 'vllm' or 'sglang', - which go through the /generate endpoint for exact token tracking. - - Phase 1 (direct server) is used for 'openai' server type, which uses - /v1/chat/completions with native tool call parsing. - """ - if not self.server.servers: - return False - - server = self.server.servers[0] - # If the server is an OpenAI server (not VLLM/SGLang), use direct mode - from atroposlib.envs.server_handling.openai_server import OpenAIServer - return not isinstance(server, OpenAIServer) - - # ========================================================================= - # Core Atropos integration - # ========================================================================= - - async def collect_trajectories( - self, item: Item - ) -> Tuple[ - Union[Optional[ScoredDataGroup], List[Optional[ScoredDataGroup]]], - List[Item], - ]: - """ - Override collect_trajectories to resolve toolsets once per group, - then delegate to the standard group-level collection. - - The default BaseEnv.collect_trajectories() calls collect_trajectory() - group_size times in parallel. We resolve tools once here and store - them for all those calls to use. - """ - # Resolve toolsets for this group (shared by all rollouts in the group) - self._current_group_tools = self._resolve_tools_for_group() - - # Delegate to the default implementation which calls collect_trajectory() - # group_size times via asyncio.gather - return await super().collect_trajectories(item) - - # ========================================================================= - # Wandb rollout display -- format trajectories nicely - # ========================================================================= - - @staticmethod - def _format_trajectory_for_display(messages: List[Dict[str, Any]]) -> str: - """ - Format a conversation's messages into a readable trajectory string - for wandb rollout tables. Shows tool calls, tool results, and reasoning - in a structured way instead of raw token decoding. - """ - parts = [] - for msg in messages: - role = msg.get("role", "unknown") - content = msg.get("content", "") - - if role == "system": - parts.append(f"[SYSTEM]\n{content}") - - elif role == "user": - parts.append(f"[USER]\n{content}") - - elif role == "assistant": - # Show reasoning if present - reasoning = msg.get("reasoning_content", "") - if reasoning: - # Truncate long reasoning for display - if len(reasoning) > 300: - reasoning = reasoning[:300] + "..." - parts.append(f"[ASSISTANT thinking]\n{reasoning}") - - # Show content - if content: - parts.append(f"[ASSISTANT]\n{content}") - - # Show tool calls - tool_calls = msg.get("tool_calls", []) - for tc in tool_calls: - func = tc.get("function", {}) - name = func.get("name", "?") - args = func.get("arguments", "{}") - # Truncate long arguments for display - if len(args) > 200: - args = args[:200] + "..." - parts.append(f"[TOOL CALL] {name}({args})") - - elif role == "tool": - tool_id = msg.get("tool_call_id", "") - result = content - # Truncate long tool results for display - if len(result) > 500: - result = result[:500] + "..." - parts.append(f"[TOOL RESULT] {result}") - - return "\n\n".join(parts) - - async def add_rollouts_for_wandb( - self, - scored_data, - item=None, - ): - """ - Override to show formatted trajectories with tool calls visible, - instead of raw token decoding which loses all structure. - """ - num_keep = self.config.num_rollouts_per_group_for_logging - if num_keep == -1: - num_keep = self.config.group_size - - group = [] - for i in range(min(num_keep, len(scored_data.get("scores", [])))): - score = scored_data["scores"][i] - - # Use messages if available for rich display - messages = None - if scored_data.get("messages") and i < len(scored_data["messages"]): - messages = scored_data["messages"][i] - - if messages: - text = self._format_trajectory_for_display(messages) - elif scored_data.get("tokens") and i < len(scored_data["tokens"]): - text = self.tokenizer.decode(scored_data["tokens"][i]) - else: - text = "(no data)" - - group.append((text, score)) - - self.rollouts_for_wandb.append(group) - if len(self.rollouts_for_wandb) > self.config.num_rollouts_to_keep: - self.rollouts_for_wandb.pop(0) - - async def wandb_log(self, wandb_metrics: Optional[Dict] = None): - """Log base metrics including tool errors to wandb.""" - if wandb_metrics is None: - wandb_metrics = {} - - # Log tool error stats - if self._tool_error_buffer: - wandb_metrics["train/tool_errors_count"] = len(self._tool_error_buffer) - - # Log error details as a summary string (tables can crash wandb on tmp cleanup) - error_summaries = [] - for err in self._tool_error_buffer: - error_summaries.append( - f"[turn {err['turn']}] {err['tool']}({err['args'][:80]}) -> {err['error'][:150]}" - ) - wandb_metrics["train/tool_error_details"] = "\n".join(error_summaries) - - # Also print to stdout for immediate visibility - for summary in error_summaries: - print(f" Tool Error: {summary}") - - self._tool_error_buffer = [] - else: - wandb_metrics["train/tool_errors_count"] = 0 - - await super().wandb_log(wandb_metrics) - - async def collect_trajectory( - self, item: Item - ) -> Tuple[Optional[Union[ScoredDataItem, Any]], List[Item]]: - """ - Run a single rollout: agent loop + reward computation. - - This is called group_size times in parallel by collect_trajectories(). - Each call gets its own task_id for terminal/browser session isolation. - """ - task_id = str(uuid.uuid4()) - - # Get group-level tools (resolved once in collect_trajectories) - if self._current_group_tools is None: - # Fallback: resolve per-trajectory if called outside collect_trajectories - tools, valid_names = self._resolve_tools_for_group() - else: - tools, valid_names = self._current_group_tools - - # Build initial messages - messages: List[Dict[str, Any]] = [] - if self.config.system_prompt: - messages.append({"role": "system", "content": self.config.system_prompt}) - messages.append({"role": "user", "content": self.format_prompt(item)}) - - # Run the agent loop - result: AgentResult - if self._use_managed_server(): - # Phase 2: ManagedServer with ToolCallTranslator -- exact tokens + logprobs - # tool_parser is set on ServerManager in __init__ and passed through - # to ManagedServer, which uses ToolCallTranslator for bidirectional - # translation between raw text and OpenAI tool_calls. - try: - async with self.server.managed_server( - tokenizer=self.tokenizer, - preserve_think_blocks=bool(self.config.thinking_mode), - ) as managed: - agent = HermesAgentLoop( - server=managed, - tool_schemas=tools, - valid_tool_names=valid_names, - max_turns=self.config.max_agent_turns, - task_id=task_id, - temperature=self.config.agent_temperature, - max_tokens=self.config.max_token_length, - extra_body=self.config.extra_body, - budget_config=self.config.build_budget_config(), - ) - result = await agent.run(messages) - except NotImplementedError: - # DummyManagedServer not allowed -- fall back to Phase 1 - logger.warning( - "ManagedServer not available (OpenAI server?). " - "Falling back to direct server mode." - ) - agent = HermesAgentLoop( - server=self.server, - tool_schemas=tools, - valid_tool_names=valid_names, - max_turns=self.config.max_agent_turns, - task_id=task_id, - temperature=self.config.agent_temperature, - max_tokens=self.config.max_token_length, - extra_body=self.config.extra_body, - budget_config=self.config.build_budget_config(), - ) - result = await agent.run(messages) - else: - # Phase 1: OpenAI server -- native tool_calls, placeholder tokens - agent = HermesAgentLoop( - server=self.server, - tool_schemas=tools, - valid_tool_names=valid_names, - max_turns=self.config.max_agent_turns, - task_id=task_id, - temperature=self.config.agent_temperature, - max_tokens=self.config.max_token_length, - extra_body=self.config.extra_body, - budget_config=self.config.build_budget_config(), - ) - result = await agent.run(messages) - - # Skip reward computation if the agent loop produced no meaningful work - # (e.g., API call failed on turn 1). No point spinning up a Modal sandbox - # just to verify files that were never created. - only_system_and_user = all( - msg.get("role") in {"system", "user"} for msg in result.messages - ) - if result.turns_used == 0 or only_system_and_user: - logger.warning( - "Agent loop produced no output (turns=%d, msgs=%d). Skipping reward.", - result.turns_used, len(result.messages), - ) - reward = 0.0 - else: - # Compute reward using ToolContext (gives verifier full tool access) - ctx = ToolContext(task_id) - try: - reward = await self.compute_reward(item, result, ctx) - except Exception as e: - logger.error("compute_reward failed: %s", e) - reward = 0.0 - finally: - ctx.cleanup() - - # Track tool errors for wandb logging - if result.tool_errors: - for err in result.tool_errors: - self._tool_error_buffer.append({ - "turn": err.turn, - "tool": err.tool_name, - "args": err.arguments[:150], - "error": err.error[:300], - "result": err.tool_result[:300], - }) - - # Build ScoredDataItem from ManagedServer state - # Phase 2: real tokens/masks/logprobs from SequenceNodes - # Phase 1: placeholder tokens (still need a valid ScoredDataItem for the pipeline) - nodes = (result.managed_state or {}).get("nodes", []) - - if nodes: - # Phase 2 (or DummyManagedServer): use actual node data - node = nodes[-1] # Final sequence node = full trajectory - scored_item: Dict[str, Any] = { - "tokens": node.tokens, - "masks": node.masked_tokens, - "scores": reward, - } - - # Include logprobs if available (Phase 2) - if hasattr(node, "logprobs") and node.logprobs: - scored_item["advantages"] = None # Computed by trainer - scored_item["ref_logprobs"] = None - else: - # Phase 1 with no managed state: create placeholder tokens - # so the data pipeline doesn't break. These are NOT suitable - # for training but allow process mode (SFT data gen) to work. - # Tokenize the full conversation to get approximate tokens. - full_text = "\n".join( - msg.get("content", "") for msg in result.messages if msg.get("content") - ) - if self.tokenizer: - tokens = self.tokenizer.encode(full_text, add_special_tokens=True) - else: - tokens = list(range(min(len(full_text) // 4, 128))) - - scored_item = { - "tokens": tokens, - "masks": [-100] + tokens[1:], # Mask first token as prompt - "scores": reward, - } - - # Always include messages for wandb rollout display and data logging - scored_item["messages"] = result.messages - - return scored_item, [] - - # ========================================================================= - # Abstract methods -- subclasses must implement - # ========================================================================= - - @abstractmethod - async def setup(self): - """ - Load dataset, initialize state. - - Called once when the environment starts. Typical implementation: - self.dataset = load_dataset(self.config.dataset_name, split=self.config.dataset_split) - self.iter = 0 - """ - raise NotImplementedError - - @abstractmethod - async def get_next_item(self) -> Item: - """ - Return the next item from the dataset for rollout. - - Called by the base env's main loop to get items for workers. - Should cycle through the dataset. - """ - raise NotImplementedError - - @abstractmethod - def format_prompt(self, item: Item) -> str: - """ - Convert a dataset item into the user message for the agent. - - Args: - item: Dataset item (dict, tuple, etc.) - - Returns: - The prompt string to send to the agent - """ - raise NotImplementedError - - @abstractmethod - async def compute_reward( - self, item: Item, result: AgentResult, ctx: ToolContext - ) -> float: - """ - Score the rollout. Has full access to: - - item: the original dataset item (ground truth, test commands, etc.) - - result: AgentResult with full messages, turn count, reasoning, etc. - - ctx: ToolContext -- call ANY hermes-agent tool (terminal, file, web, - browser, vision...) scoped to this rollout's sandbox. Nothing - is off-limits. - - Args: - item: The dataset item that was rolled out - result: The agent's rollout result - ctx: ToolContext with full tool access for verification - - Returns: - Reward float (typically 0.0 to 1.0, but any float is valid) - """ - raise NotImplementedError - - @abstractmethod - async def evaluate(self, *args, **kwargs): - """ - Periodic evaluation. Called every steps_per_eval steps. - - Typical implementation runs the agent on a held-out eval set - and logs metrics via wandb/evaluate_log. - """ - raise NotImplementedError diff --git a/environments/hermes_swe_env/__init__.py b/environments/hermes_swe_env/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/environments/hermes_swe_env/default.yaml b/environments/hermes_swe_env/default.yaml deleted file mode 100644 index 2d0113345f88..000000000000 --- a/environments/hermes_swe_env/default.yaml +++ /dev/null @@ -1,34 +0,0 @@ -# SWE Environment -- Default Configuration -# -# SWE-bench style tasks with Modal sandboxes for cloud isolation. -# Uses terminal + file + web toolsets. -# -# Usage: -# python environments/hermes_swe_env/hermes_swe_env.py serve \ -# --config environments/hermes_swe_env/default.yaml - -env: - enabled_toolsets: ["terminal", "file", "web"] - max_agent_turns: 30 - max_token_length: 4096 - group_size: 4 - terminal_backend: "modal" - tool_call_parser: "hermes" - tokenizer_name: "NousResearch/DeepHermes-3-Llama-3-3B-Preview" - dataset_name: "bigcode/humanevalpack" - dataset_split: "test" - prompt_field: "prompt" - steps_per_eval: 50 - total_steps: 500 - use_wandb: true - wandb_name: "hermes-swe" - system_prompt: > - You are a skilled software engineer. You have access to a terminal, - file tools, and web search. Use these tools to complete the coding task. - Write clean, working code and verify it runs correctly before finishing. - -openai: - base_url: "http://localhost:8000/v1" - model_name: "NousResearch/DeepHermes-3-Llama-3-3B-Preview" - server_type: "openai" - api_key: "" diff --git a/environments/hermes_swe_env/hermes_swe_env.py b/environments/hermes_swe_env/hermes_swe_env.py deleted file mode 100644 index 49c521e5f76e..000000000000 --- a/environments/hermes_swe_env/hermes_swe_env.py +++ /dev/null @@ -1,229 +0,0 @@ -""" -HermesSweEnv -- SWE-Bench Style Environment with Modal Sandboxes - -A concrete environment for software engineering tasks where the model writes code -and the reward function runs tests to verify correctness. Uses Modal terminal -backend for cloud-isolated sandboxes per rollout. - -The reward function uses ToolContext.terminal() to run test commands in the same -Modal sandbox the model used during its agentic loop. All filesystem state from -the model's tool calls is preserved for verification. - -Usage: - # Phase 1: OpenAI server type - vllm serve YourModel --tool-parser hermes - run-api - python environments/hermes_swe_env.py serve \\ - --openai.base_url http://localhost:8000/v1 \\ - --openai.model_name YourModel \\ - --openai.server_type openai \\ - --env.dataset_name bigcode/humanevalpack \\ - --env.terminal_backend modal - - # Phase 2: VLLM server type (full RL training) - python environments/hermes_swe_env.py serve \\ - --openai.base_url http://localhost:8000/v1 \\ - --openai.model_name YourModel \\ - --openai.server_type vllm \\ - --env.tool_call_parser hermes \\ - --env.terminal_backend modal -""" - -import logging -import sys -import time -from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple, Union - -# Ensure repo root is on sys.path for imports -_repo_root = Path(__file__).resolve().parent.parent.parent -if str(_repo_root) not in sys.path: - sys.path.insert(0, str(_repo_root)) - -from datasets import load_dataset - -from atroposlib.envs.base import ScoredDataGroup -from atroposlib.envs.server_handling.server_manager import APIServerConfig -from atroposlib.type_definitions import Item - -from environments.agent_loop import AgentResult -from environments.hermes_base_env import HermesAgentBaseEnv, HermesAgentEnvConfig -from environments.tool_context import ToolContext - -logger = logging.getLogger(__name__) - - -class HermesSweEnvConfig(HermesAgentEnvConfig): - """Config with defaults for SWE-bench style tasks.""" - - pass # Inherits all fields, overrides defaults in config_init - - -class HermesSweEnv(HermesAgentBaseEnv): - """ - SWE-bench style environment using Modal terminal backend. - - The model gets a coding task, uses terminal + file + web tools to solve it, - and the reward function runs tests in the same Modal sandbox to verify. - - Subclass this for specific SWE datasets (HumanEval, SWE-bench, etc.) - and customize format_prompt() and compute_reward() as needed. - """ - - name = "hermes-swe" - env_config_cls = HermesSweEnvConfig - - @classmethod - def config_init(cls) -> Tuple[HermesSweEnvConfig, List[APIServerConfig]]: - """ - Default configuration for the SWE environment. - - Uses Modal terminal backend for cloud isolation and terminal + file + web toolsets. - """ - env_config = HermesSweEnvConfig( - # Toolsets: terminal for running code, file for reading/writing, web for docs - enabled_toolsets=["terminal", "file", "web"], - disabled_toolsets=None, - distribution=None, - # Agent settings -- SWE tasks need more turns - max_agent_turns=30, - max_token_length=4096, - agent_temperature=1.0, - system_prompt=( - "You are a skilled software engineer. You have access to a terminal, " - "file tools, and web search. Use these tools to complete the coding task. " - "Write clean, working code and verify it runs correctly before finishing." - ), - # Modal backend for cloud-isolated sandboxes - terminal_backend="modal", - # Dataset -- override via CLI for your specific SWE dataset - dataset_name="bigcode/humanevalpack", - dataset_split="test", - prompt_field="prompt", - # Atropos settings - group_size=4, - tokenizer_name="NousResearch/DeepHermes-3-Llama-3-3B-Preview", - tool_call_parser="hermes", - steps_per_eval=50, - total_steps=500, - use_wandb=True, - wandb_name="hermes-swe", - ) - - server_configs = [ - APIServerConfig( - base_url="http://localhost:8000/v1", - model_name="NousResearch/DeepHermes-3-Llama-3-3B-Preview", - server_type="openai", # Phase 1; switch to "vllm" for Phase 2 - api_key="", - ) - ] - - return env_config, server_configs - - async def setup(self): - """Load the SWE dataset.""" - if self.config.dataset_name: - self.dataset = load_dataset( - self.config.dataset_name, split=self.config.dataset_split - ) - else: - # Placeholder if no dataset specified - self.dataset = [] - self.iter = 0 - self.reward_buffer: List[float] = [] - - async def get_next_item(self) -> Dict[str, Any]: - """Cycle through the SWE dataset.""" - if not self.dataset: - raise ValueError("No dataset loaded. Set dataset_name in config.") - item = self.dataset[self.iter % len(self.dataset)] - self.iter += 1 - return item - - def format_prompt(self, item: Dict[str, Any]) -> str: - """ - Format the SWE task prompt. - - Override this in subclasses for different dataset formats. - Default assumes the dataset has a 'prompt' field and optionally a 'test' field. - """ - prompt = item.get(self.config.prompt_field, "") - - # If the dataset has test information, include it in the prompt - test_info = item.get("test", item.get("test_code", item.get("tests", ""))) - if test_info: - prompt += f"\n\nTests to pass:\n{test_info}" - - return prompt - - async def compute_reward( - self, item: Dict[str, Any], result: AgentResult, ctx: ToolContext - ) -> float: - """ - Score by running tests in the model's Modal sandbox. - - Default implementation: - - If the dataset item has a 'test' or 'test_code' field, run it - - Check exit code: 0 = pass, non-zero = fail - - Partial credit for file creation - - Override this in subclasses for more sophisticated reward logic. - """ - # Find the test command from the dataset item - test_code = item.get("test", item.get("test_code", item.get("tests", ""))) - - if test_code: - # Run the test in the model's sandbox - test_result = ctx.terminal( - f'cd /workspace && python3 -c "{test_code}"', timeout=60 - ) - - if test_result["exit_code"] == 0: - self.reward_buffer.append(1.0) - return 1.0 - - # Partial credit: check if the model created any Python files - file_check = ctx.terminal("find /workspace -name '*.py' -newer /tmp/.start_marker 2>/dev/null | head -5") - if file_check["exit_code"] == 0 and file_check.get("output", "").strip(): - self.reward_buffer.append(0.1) - return 0.1 - - self.reward_buffer.append(0.0) - return 0.0 - - async def evaluate(self, *args, **kwargs): - """ - Run evaluation on a held-out set. - - Override for dataset-specific evaluation logic. - """ - start_time = time.time() - end_time = time.time() - - eval_metrics = {"eval/placeholder": 0.0} - await self.evaluate_log( - metrics=eval_metrics, - start_time=start_time, - end_time=end_time, - ) - - async def wandb_log(self, wandb_metrics: Optional[Dict] = None): - """Log SWE-specific metrics.""" - if wandb_metrics is None: - wandb_metrics = {} - - if self.reward_buffer: - wandb_metrics["train/avg_reward"] = sum(self.reward_buffer) / len( - self.reward_buffer - ) - wandb_metrics["train/pass_rate"] = sum( - 1 for r in self.reward_buffer if r == 1.0 - ) / len(self.reward_buffer) - self.reward_buffer = [] - - await super().wandb_log(wandb_metrics) - - -if __name__ == "__main__": - HermesSweEnv.cli() diff --git a/environments/patches.py b/environments/patches.py deleted file mode 100644 index a5afe751ece2..000000000000 --- a/environments/patches.py +++ /dev/null @@ -1,35 +0,0 @@ -""" -Monkey patches for making hermes-agent tools work inside async frameworks (Atropos). - -Problem: - Some tools use asyncio.run() internally (e.g., Modal backend via SWE-ReX, - web_extract). This crashes when called from inside Atropos's event loop because - asyncio.run() can't be nested. - -Solution: - The Modal environment (tools/environments/modal.py) now uses a dedicated - _AsyncWorker thread internally, making it safe for both CLI and Atropos use. - No monkey-patching is required. - - This module is kept for backward compatibility. apply_patches() is a no-op. - -Usage: - Call apply_patches() once at import time (done automatically by hermes_base_env.py). - This is idempotent and safe to call multiple times. -""" - -import logging - -logger = logging.getLogger(__name__) - -_patches_applied = False - - -def apply_patches(): - """Apply all monkey patches needed for Atropos compatibility.""" - global _patches_applied - if _patches_applied: - return - - logger.debug("apply_patches() called; no patches needed (async safety is built-in)") - _patches_applied = True diff --git a/environments/terminal_test_env/__init__.py b/environments/terminal_test_env/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/environments/terminal_test_env/default.yaml b/environments/terminal_test_env/default.yaml deleted file mode 100644 index dc971071c3a8..000000000000 --- a/environments/terminal_test_env/default.yaml +++ /dev/null @@ -1,34 +0,0 @@ -# Terminal Test Environment -- Default Configuration -# -# Simple file-creation tasks for validating the full Atropos + hermes-agent stack. -# Uses Modal terminal backend and OpenRouter (Claude) for inference. -# API keys loaded from ~/hermes-agent/.env -# -# Usage: -# run-api -# python environments/terminal_test_env/terminal_test_env.py serve \ -# --config environments/terminal_test_env/default.yaml - -env: - enabled_toolsets: ["terminal", "file"] - max_agent_turns: 10 - max_token_length: 2048 - group_size: 3 - total_steps: 3 - steps_per_eval: 3 - terminal_backend: "modal" - tool_call_parser: "hermes" - tokenizer_name: "NousResearch/DeepHermes-3-Llama-3-3B-Preview" - ensure_scores_are_not_same: false - use_wandb: false - system_prompt: > - You are a helpful assistant with access to a terminal and file tools. - Complete the user's request by using the available tools. - Be precise and follow instructions exactly. - -openai: - base_url: "https://openrouter.ai/api/v1" - model_name: "anthropic/claude-opus-4.6" - server_type: "openai" - health_check: false - # api_key loaded from OPENROUTER_API_KEY in .env diff --git a/environments/terminal_test_env/terminal_test_env.py b/environments/terminal_test_env/terminal_test_env.py deleted file mode 100644 index 4d151ee7b76e..000000000000 --- a/environments/terminal_test_env/terminal_test_env.py +++ /dev/null @@ -1,292 +0,0 @@ -""" -TerminalTestEnv -- Simple Test Environment for Validating the Stack - -A self-contained environment with inline tasks (no external dataset needed). -Each task asks the model to create a file at a known path with specific content. -The reward verifier cats the file and checks if the content matches. - -Enables only terminal + file toolsets. Uses Modal terminal backend with -OpenRouter (Claude) by default. - -Training tasks (3): - 1. Create ~/greeting.txt with "Hello from Hermes Agent" - 2. Create ~/count.txt with numbers 1-5, one per line - 3. Create ~/answer.txt with the result of 123 + 456 - -Eval task (1): - 1. Create ~/result.txt with the result of 6 * 7 - -Usage: - # Start Atropos API server - run-api - - # Run environment (uses OpenRouter + Modal by default) - python environments/terminal_test_env.py serve - - # Process mode (no run-api needed, saves to JSONL) - python environments/terminal_test_env.py process \\ - --env.data_path_to_save_groups terminal_test_output.jsonl -""" - -import logging -import os -import sys -import time -from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple, Union - -# Ensure repo root is on sys.path for imports -_repo_root = Path(__file__).resolve().parent.parent.parent -if str(_repo_root) not in sys.path: - sys.path.insert(0, str(_repo_root)) - -from atroposlib.envs.base import ScoredDataGroup -from atroposlib.envs.server_handling.server_manager import APIServerConfig -from atroposlib.type_definitions import Item - -from environments.agent_loop import AgentResult -from environments.hermes_base_env import HermesAgentBaseEnv, HermesAgentEnvConfig -from environments.tool_context import ToolContext - -logger = logging.getLogger(__name__) - - -# ============================================================================= -# Inline task definitions -- no external dataset needed -# ============================================================================= - -TRAIN_TASKS = [ - { - "prompt": "Create a file at ~/greeting.txt containing exactly the text: Hello from Hermes Agent", - "verify_path": "~/greeting.txt", - "expected_content": "Hello from Hermes Agent", - }, - { - "prompt": "Create a file at ~/count.txt containing the numbers 1 through 5, one per line", - "verify_path": "~/count.txt", - "expected_content": "1\n2\n3\n4\n5", - }, - { - "prompt": "Create a file at ~/answer.txt containing the result of 123 + 456", - "verify_path": "~/answer.txt", - "expected_content": "579", - }, -] - -EVAL_TASKS = [ - { - "prompt": "Create a file at ~/result.txt containing the result of 6 * 7", - "verify_path": "~/result.txt", - "expected_content": "42", - }, -] - - -class TerminalTestEnvConfig(HermesAgentEnvConfig): - """Config with defaults suitable for terminal testing.""" - - pass # Inherits all fields, overrides defaults in config_init - - -class TerminalTestEnv(HermesAgentBaseEnv): - """ - Simple test environment with inline file-creation tasks. - - All tasks follow the same pattern: "create a file at ~/X.txt with content Y". - The verifier runs `cat ~/X.txt` in the rollout's terminal and checks the output - against the expected string. Same verifier logic for all tasks. - - This environment is designed to validate the full stack end-to-end: - - Agent loop executes tool calls (terminal/file) - - ToolContext provides terminal access to the reward function - - Reward function verifies file content via cat - - Scored data flows through the Atropos pipeline - """ - - name = "terminal-test" - env_config_cls = TerminalTestEnvConfig - - @classmethod - def config_init(cls) -> Tuple[TerminalTestEnvConfig, List[APIServerConfig]]: - """ - Default configuration for the terminal test environment. - - Uses Modal terminal backend for cloud isolation and OpenRouter with - Claude for inference. API keys loaded from ~/hermes-agent/.env. - """ - env_config = TerminalTestEnvConfig( - # Terminal + file tools only - enabled_toolsets=["terminal", "file"], - disabled_toolsets=None, - distribution=None, - # Agent settings - max_agent_turns=10, # Simple tasks, don't need many turns - max_token_length=16000, - agent_temperature=1.0, - system_prompt=( - "You are a helpful assistant with access to a terminal and file tools. " - "Complete the user's request by using the available tools. " - "Be precise and follow instructions exactly." - ), - # Modal terminal backend for cloud-isolated sandboxes per rollout - terminal_backend="modal", - # Atropos settings - group_size=3, # 3 rollouts per group - tokenizer_name="NousResearch/q-30b-t-h45-e1", - tool_call_parser="hermes", - steps_per_eval=3, # Eval after all 3 steps - total_steps=3, # 3 groups total (1 group per step) - use_wandb=True, - wandb_name="terminal-test", - ensure_scores_are_not_same=False, # Allow all-same scores for simple tasks - # No external dataset - dataset_name=None, - ) - - # OpenRouter with Claude -- API key loaded from .env (OPENROUTER_API_KEY) - server_configs = [ - APIServerConfig( - base_url="https://openrouter.ai/api/v1", - model_name="anthropic/claude-opus-4.6", - server_type="openai", - api_key=os.getenv("OPENROUTER_API_KEY", ""), - health_check=False, # OpenRouter doesn't have a /health endpoint - ) - ] - - return env_config, server_configs - - async def setup(self): - """Initialize inline task lists.""" - self.train_tasks = list(TRAIN_TASKS) - self.eval_tasks = list(EVAL_TASKS) - self.iter = 0 - # Track reward stats for wandb logging - self.reward_buffer: List[float] = [] - - async def get_next_item(self) -> Dict[str, str]: - """Cycle through training tasks.""" - item = self.train_tasks[self.iter % len(self.train_tasks)] - self.iter += 1 - return item - - def format_prompt(self, item: Dict[str, str]) -> str: - """The prompt is directly in the task item.""" - return item["prompt"] - - async def compute_reward( - self, item: Dict[str, str], result: AgentResult, ctx: ToolContext - ) -> float: - """ - Verify by cat-ing the expected file path and checking content matches. - Same verifier for all tasks -- they all write a file at a known path. - - Scoring: - 1.0 = exact match - 0.5 = expected content is present but has extra stuff - 0.0 = file doesn't exist or content doesn't match - """ - verify_result = ctx.terminal(f"cat {item['verify_path']}") - - # File doesn't exist or can't be read - if verify_result["exit_code"] != 0: - self.reward_buffer.append(0.0) - return 0.0 - - actual = verify_result.get("output", "").strip() - expected = item["expected_content"].strip() - - # Exact match - if actual == expected: - self.reward_buffer.append(1.0) - return 1.0 - - # Partial credit: expected content is present but has extra stuff - if expected in actual: - self.reward_buffer.append(0.5) - return 0.5 - - self.reward_buffer.append(0.0) - return 0.0 - - async def evaluate(self, *args, **kwargs): - """ - Run eval tasks using the agent loop and verify results. - Logs accuracy metrics. - """ - start_time = time.time() - correct = 0 - total = len(self.eval_tasks) - samples = [] - - for eval_item in self.eval_tasks: - try: - # For eval, we do a simple single-turn completion (not full agent loop) - # to keep eval fast. The agent loop is tested via training. - completion = await self.server.chat_completion( - messages=[ - {"role": "system", "content": self.config.system_prompt or ""}, - {"role": "user", "content": eval_item["prompt"]}, - ], - n=1, - max_tokens=self.config.max_token_length, - temperature=0.0, - split="eval", - ) - - response_content = ( - completion.choices[0].message.content if completion.choices else "" - ) - - samples.append( - { - "prompt": eval_item["prompt"], - "response": response_content, - "expected": eval_item["expected_content"], - } - ) - - except Exception as e: - logger.error("Eval failed for item: %s", e) - samples.append( - { - "prompt": eval_item["prompt"], - "response": f"ERROR: {e}", - "expected": eval_item["expected_content"], - } - ) - - end_time = time.time() - - eval_metrics = { - "eval/num_samples": total, - } - - await self.evaluate_log( - metrics=eval_metrics, - samples=samples, - start_time=start_time, - end_time=end_time, - ) - - async def wandb_log(self, wandb_metrics: Optional[Dict] = None): - """Log training metrics including reward stats and accuracy.""" - if wandb_metrics is None: - wandb_metrics = {} - - if self.reward_buffer: - total = len(self.reward_buffer) - correct = sum(1 for r in self.reward_buffer if r == 1.0) - partial = sum(1 for r in self.reward_buffer if r == 0.5) - - wandb_metrics["train/avg_reward"] = sum(self.reward_buffer) / total - wandb_metrics["train/accuracy"] = correct / total - wandb_metrics["train/partial_match_rate"] = partial / total - wandb_metrics["train/total_rollouts"] = total - self.reward_buffer = [] - - await super().wandb_log(wandb_metrics) - - -if __name__ == "__main__": - TerminalTestEnv.cli() diff --git a/environments/tool_call_parsers/__init__.py b/environments/tool_call_parsers/__init__.py deleted file mode 100644 index 8bff3f9d1f06..000000000000 --- a/environments/tool_call_parsers/__init__.py +++ /dev/null @@ -1,120 +0,0 @@ -""" -Tool Call Parser Registry - -Client-side parsers that extract structured tool_calls from raw model output text. -Used in Phase 2 (VLLM server type) where ManagedServer's /generate endpoint returns -raw text without tool call parsing. - -Each parser is a standalone reimplementation of the corresponding VLLM parser's -non-streaming extract_tool_calls() logic. No VLLM dependency -- only standard library -(re, json, uuid) and openai types. - -Usage: - from environments.tool_call_parsers import get_parser - - parser = get_parser("hermes") - content, tool_calls = parser.parse(raw_model_output) - # content = text with tool call markup stripped - # tool_calls = list of ChatCompletionMessageToolCall objects, or None -""" - -import logging -from abc import ABC, abstractmethod -from typing import Dict, List, Optional, Tuple, Type - -from openai.types.chat.chat_completion_message_tool_call import ( - ChatCompletionMessageToolCall, -) - -logger = logging.getLogger(__name__) - -# Type alias for parser return value -ParseResult = Tuple[Optional[str], Optional[List[ChatCompletionMessageToolCall]]] - - -class ToolCallParser(ABC): - """ - Base class for tool call parsers. - - Each parser knows how to extract structured tool_calls from a specific - model family's raw output text format. - """ - - @abstractmethod - def parse(self, text: str) -> ParseResult: - """ - Parse raw model output text for tool calls. - - Args: - text: Raw decoded text from the model's completion - - Returns: - Tuple of (content, tool_calls) where: - - content: text with tool call markup stripped (the message 'content' field), - or None if the entire output was tool calls - - tool_calls: list of ChatCompletionMessageToolCall objects, - or None if no tool calls were found - """ - raise NotImplementedError - - -# Global parser registry: name -> parser class -PARSER_REGISTRY: Dict[str, Type[ToolCallParser]] = {} - - -def register_parser(name: str): - """ - Decorator to register a parser class under a given name. - - Usage: - @register_parser("hermes") - class HermesToolCallParser(ToolCallParser): - ... - """ - - def decorator(cls: Type[ToolCallParser]) -> Type[ToolCallParser]: - PARSER_REGISTRY[name] = cls - return cls - - return decorator - - -def get_parser(name: str) -> ToolCallParser: - """ - Get a parser instance by name. - - Args: - name: Parser name (e.g., "hermes", "mistral", "llama3_json") - - Returns: - Instantiated parser - - Raises: - KeyError: If parser name is not found in registry - """ - if name not in PARSER_REGISTRY: - available = sorted(PARSER_REGISTRY.keys()) - raise KeyError( - f"Tool call parser '{name}' not found. Available parsers: {available}" - ) - return PARSER_REGISTRY[name]() - - -def list_parsers() -> List[str]: - """Return sorted list of registered parser names.""" - return sorted(PARSER_REGISTRY.keys()) - - -# Import all parser modules to trigger registration via @register_parser decorators -# Each module registers itself when imported -from environments.tool_call_parsers.hermes_parser import HermesToolCallParser # noqa: E402, F401 -from environments.tool_call_parsers.longcat_parser import LongcatToolCallParser # noqa: E402, F401 -from environments.tool_call_parsers.mistral_parser import MistralToolCallParser # noqa: E402, F401 -from environments.tool_call_parsers.llama_parser import LlamaToolCallParser # noqa: E402, F401 -from environments.tool_call_parsers.qwen_parser import QwenToolCallParser # noqa: E402, F401 -from environments.tool_call_parsers.deepseek_v3_parser import DeepSeekV3ToolCallParser # noqa: E402, F401 -from environments.tool_call_parsers.deepseek_v3_1_parser import DeepSeekV31ToolCallParser # noqa: E402, F401 -from environments.tool_call_parsers.kimi_k2_parser import KimiK2ToolCallParser # noqa: E402, F401 -from environments.tool_call_parsers.glm45_parser import Glm45ToolCallParser # noqa: E402, F401 -from environments.tool_call_parsers.glm47_parser import Glm47ToolCallParser # noqa: E402, F401 -from environments.tool_call_parsers.qwen3_coder_parser import Qwen3CoderToolCallParser # noqa: E402, F401 diff --git a/environments/tool_call_parsers/deepseek_v3_1_parser.py b/environments/tool_call_parsers/deepseek_v3_1_parser.py deleted file mode 100644 index 8456990c6ad1..000000000000 --- a/environments/tool_call_parsers/deepseek_v3_1_parser.py +++ /dev/null @@ -1,72 +0,0 @@ -""" -DeepSeek V3.1 tool call parser. - -Similar to V3 but with a slightly different format: - <๏ฝœtoolโ–callโ–begin๏ฝœ>function_name<๏ฝœtoolโ–sep๏ฝœ>arguments<๏ฝœtoolโ–callโ–end๏ฝœ> - -Note: V3 has type+name before the separator, V3.1 has name before and args after. - -Based on VLLM's DeepSeekV31ToolParser.extract_tool_calls() -""" - -import re -import uuid -from typing import List, Optional - -from openai.types.chat.chat_completion_message_tool_call import ( - ChatCompletionMessageToolCall, - Function, -) - -from environments.tool_call_parsers import ParseResult, ToolCallParser, register_parser - - -@register_parser("deepseek_v3_1") -@register_parser("deepseek_v31") -class DeepSeekV31ToolCallParser(ToolCallParser): - """ - Parser for DeepSeek V3.1 tool calls. - - Slightly different regex than V3: function_name comes before the separator, - arguments come after (no type field, no json code block wrapper). - """ - - START_TOKEN = "<๏ฝœtoolโ–callsโ–begin๏ฝœ>" - - # Regex captures: function_name, function_arguments - PATTERN = re.compile( - r"<๏ฝœtoolโ–callโ–begin๏ฝœ>(?P.*?)<๏ฝœtoolโ–sep๏ฝœ>(?P.*?)<๏ฝœtoolโ–callโ–end๏ฝœ>", - re.DOTALL, - ) - - def parse(self, text: str) -> ParseResult: - if self.START_TOKEN not in text: - return text, None - - try: - matches = self.PATTERN.findall(text) - if not matches: - return text, None - - tool_calls: List[ChatCompletionMessageToolCall] = [] - for match in matches: - func_name, func_args = match - tool_calls.append( - ChatCompletionMessageToolCall( - id=f"call_{uuid.uuid4().hex[:8]}", - type="function", - function=Function( - name=func_name.strip(), - arguments=func_args.strip(), - ), - ) - ) - - if not tool_calls: - return text, None - - content = text[: text.find(self.START_TOKEN)].strip() - return content if content else None, tool_calls - - except Exception: - return text, None diff --git a/environments/tool_call_parsers/deepseek_v3_parser.py b/environments/tool_call_parsers/deepseek_v3_parser.py deleted file mode 100644 index 61d23d5feccb..000000000000 --- a/environments/tool_call_parsers/deepseek_v3_parser.py +++ /dev/null @@ -1,89 +0,0 @@ -""" -DeepSeek V3 tool call parser. - -Format uses special unicode tokens: - <๏ฝœtoolโ–callsโ–begin๏ฝœ> - <๏ฝœtoolโ–callโ–begin๏ฝœ>type<๏ฝœtoolโ–sep๏ฝœ>function_name - ```json - {"arg": "value"} - ``` - <๏ฝœtoolโ–callโ–end๏ฝœ> - <๏ฝœtoolโ–callsโ–end๏ฝœ> - -Fixes Issue #989: Support for multiple simultaneous tool calls. -""" - -import re -import uuid -import logging -from typing import List, Optional, Tuple - -from openai.types.chat.chat_completion_message_tool_call import ( - ChatCompletionMessageToolCall, - Function, -) - -from environments.tool_call_parsers import ParseResult, ToolCallParser, register_parser - -logger = logging.getLogger(__name__) - -@register_parser("deepseek_v3") -class DeepSeekV3ToolCallParser(ToolCallParser): - """ - Parser for DeepSeek V3 tool calls. - - Uses special unicode tokens with fullwidth angle brackets and block elements. - Extracts type, function name, and JSON arguments from the structured format. - Ensures all tool calls are captured when the model executes multiple actions. - """ - - START_TOKEN = "<๏ฝœtoolโ–callsโ–begin๏ฝœ>" - - # Updated PATTERN: Using \s* instead of literal \n for increased robustness - # against variations in model formatting (Issue #989). - PATTERN = re.compile( - r"<๏ฝœtoolโ–callโ–begin๏ฝœ>(?P.*?)<๏ฝœtoolโ–sep๏ฝœ>(?P.*?)\s*```json\s*(?P.*?)\s*```\s*<๏ฝœtoolโ–callโ–end๏ฝœ>", - re.DOTALL, - ) - - def parse(self, text: str) -> ParseResult: - """ - Parses the input text and extracts all available tool calls. - """ - if self.START_TOKEN not in text: - return text, None - - try: - # Using finditer to capture ALL tool calls in the sequence - matches = list(self.PATTERN.finditer(text)) - if not matches: - return text, None - - tool_calls: List[ChatCompletionMessageToolCall] = [] - - for match in matches: - func_name = match.group("function_name").strip() - func_args = match.group("function_arguments").strip() - - tool_calls.append( - ChatCompletionMessageToolCall( - id=f"call_{uuid.uuid4().hex[:8]}", - type="function", - function=Function( - name=func_name, - arguments=func_args, - ), - ) - ) - - if tool_calls: - # Content is text before the first tool call block - content_index = text.find(self.START_TOKEN) - content = text[:content_index].strip() - return content if content else None, tool_calls - - return text, None - - except Exception as e: - logger.error(f"Error parsing DeepSeek V3 tool calls: {e}") - return text, None diff --git a/environments/tool_call_parsers/glm45_parser.py b/environments/tool_call_parsers/glm45_parser.py deleted file mode 100644 index e92e29881f1d..000000000000 --- a/environments/tool_call_parsers/glm45_parser.py +++ /dev/null @@ -1,109 +0,0 @@ -""" -GLM 4.5 (GLM-4-MoE) tool call parser. - -Format uses custom arg_key/arg_value tags rather than standard JSON: - function_name - param1value1 - param2value2 - - -Values are deserialized using json.loads -> ast.literal_eval -> raw string fallback. - -Based on VLLM's Glm4MoeModelToolParser.extract_tool_calls() -""" - -import ast -import json -import re -import uuid -from typing import Any, Dict, List, Optional - -from openai.types.chat.chat_completion_message_tool_call import ( - ChatCompletionMessageToolCall, - Function, -) - -from environments.tool_call_parsers import ParseResult, ToolCallParser, register_parser - - -def _deserialize_value(value: str) -> Any: - """ - Try to deserialize a string value to its native Python type. - Attempts json.loads, then ast.literal_eval, then returns raw string. - """ - try: - return json.loads(value) - except (json.JSONDecodeError, TypeError): - pass - - try: - return ast.literal_eval(value) - except (ValueError, SyntaxError, TypeError): - pass - - return value - - -@register_parser("glm45") -class Glm45ToolCallParser(ToolCallParser): - """ - Parser for GLM 4.5 (GLM-4-MoE) tool calls. - - Uses ... tags with / pairs - instead of standard JSON arguments. - """ - - FUNC_CALL_REGEX = re.compile(r".*?", re.DOTALL) - FUNC_DETAIL_REGEX = re.compile(r"([^\n]*)\n(.*)", re.DOTALL) - FUNC_ARG_REGEX = re.compile( - r"(.*?)\s*(.*?)", re.DOTALL - ) - - START_TOKEN = "" - - def parse(self, text: str) -> ParseResult: - if self.START_TOKEN not in text: - return text, None - - try: - matched_calls = self.FUNC_CALL_REGEX.findall(text) - if not matched_calls: - return text, None - - tool_calls: List[ChatCompletionMessageToolCall] = [] - - for match in matched_calls: - detail = self.FUNC_DETAIL_REGEX.search(match) - if not detail: - continue - - func_name = detail.group(1).strip() - func_args_raw = detail.group(2) - - # Parse arg_key/arg_value pairs - pairs = self.FUNC_ARG_REGEX.findall(func_args_raw) if func_args_raw else [] - arg_dict: Dict[str, Any] = {} - for key, value in pairs: - arg_key = key.strip() - arg_val = _deserialize_value(value.strip()) - arg_dict[arg_key] = arg_val - - tool_calls.append( - ChatCompletionMessageToolCall( - id=f"call_{uuid.uuid4().hex[:8]}", - type="function", - function=Function( - name=func_name, - arguments=json.dumps(arg_dict, ensure_ascii=False), - ), - ) - ) - - if not tool_calls: - return text, None - - content = text[: text.find(self.START_TOKEN)].strip() - return content if content else None, tool_calls - - except Exception: - return text, None diff --git a/environments/tool_call_parsers/glm47_parser.py b/environments/tool_call_parsers/glm47_parser.py deleted file mode 100644 index 6631cf842ce7..000000000000 --- a/environments/tool_call_parsers/glm47_parser.py +++ /dev/null @@ -1,35 +0,0 @@ -""" -GLM 4.7 tool call parser. - -Same as GLM 4.5 but with slightly different regex patterns. -The tool_call tags may wrap differently and arg parsing handles -newlines between key/value pairs. - -Based on VLLM's Glm47MoeModelToolParser (extends Glm4MoeModelToolParser). -""" - -import re - -from environments.tool_call_parsers import ParseResult, register_parser -from environments.tool_call_parsers.glm45_parser import Glm45ToolCallParser - - -@register_parser("glm47") -class Glm47ToolCallParser(Glm45ToolCallParser): - """ - Parser for GLM 4.7 tool calls. - Extends GLM 4.5 with updated regex patterns. - """ - - def __init__(self): - super().__init__() - # GLM 4.7 uses a slightly different detail regex that includes - # the wrapper and optional arg_key content - self.FUNC_DETAIL_REGEX = re.compile( - r"(.*?)(.*?)?", re.DOTALL - ) - # GLM 4.7 handles newlines between arg_key and arg_value tags - self.FUNC_ARG_REGEX = re.compile( - r"(.*?)(?:\\n|\s)*(.*?)", - re.DOTALL, - ) diff --git a/environments/tool_call_parsers/hermes_parser.py b/environments/tool_call_parsers/hermes_parser.py deleted file mode 100644 index c6f911db04a5..000000000000 --- a/environments/tool_call_parsers/hermes_parser.py +++ /dev/null @@ -1,75 +0,0 @@ -""" -Hermes tool call parser. - -Format: {"name": "func", "arguments": {...}} -Based on VLLM's Hermes2ProToolParser.extract_tool_calls() -""" - -import json -import re -import uuid -from typing import List, Optional, Tuple - -from openai.types.chat.chat_completion_message_tool_call import ( - ChatCompletionMessageToolCall, - Function, -) - -from environments.tool_call_parsers import ParseResult, ToolCallParser, register_parser - - -@register_parser("hermes") -class HermesToolCallParser(ToolCallParser): - """ - Parser for Hermes-format tool calls. - - Matches ... tags containing JSON with "name" and "arguments". - Also handles unclosed at end-of-string (truncated generation). - """ - - # Matches both closed and unclosed tool_call tags - PATTERN = re.compile( - r"\s*(.*?)\s*|\s*(.*)", re.DOTALL - ) - - def parse(self, text: str) -> ParseResult: - if "" not in text: - return text, None - - try: - matches = self.PATTERN.findall(text) - if not matches: - return text, None - - tool_calls: List[ChatCompletionMessageToolCall] = [] - for match in matches: - # match is a tuple: (closed_content, unclosed_content) - raw_json = match[0] if match[0] else match[1] - if not raw_json.strip(): - continue - - tc_data = json.loads(raw_json) - if "name" not in tc_data: - continue - tool_calls.append( - ChatCompletionMessageToolCall( - id=f"call_{uuid.uuid4().hex[:8]}", - type="function", - function=Function( - name=tc_data["name"], - arguments=json.dumps( - tc_data.get("arguments", {}), ensure_ascii=False - ), - ), - ) - ) - - if not tool_calls: - return text, None - - # Content is everything before the first tag - content = text[: text.find("")].strip() - return content if content else None, tool_calls - - except Exception: - return text, None diff --git a/environments/tool_call_parsers/kimi_k2_parser.py b/environments/tool_call_parsers/kimi_k2_parser.py deleted file mode 100644 index 29f40fc24356..000000000000 --- a/environments/tool_call_parsers/kimi_k2_parser.py +++ /dev/null @@ -1,93 +0,0 @@ -""" -Kimi K2 tool call parser. - -Format: - <|tool_calls_section_begin|> - <|tool_call_begin|>function_id:0<|tool_call_argument_begin|>{"arg": "val"}<|tool_call_end|> - <|tool_calls_section_end|> - -The function_id format is typically "functions.func_name:index" or "func_name:index". - -Based on VLLM's KimiK2ToolParser.extract_tool_calls() -""" - -import re -import uuid -from typing import List, Optional - -from openai.types.chat.chat_completion_message_tool_call import ( - ChatCompletionMessageToolCall, - Function, -) - -from environments.tool_call_parsers import ParseResult, ToolCallParser, register_parser - - -@register_parser("kimi_k2") -class KimiK2ToolCallParser(ToolCallParser): - """ - Parser for Kimi K2 tool calls. - - Uses section begin/end tokens wrapping individual tool call begin/end tokens. - The tool_call_id contains the function name (after last dot, before colon). - """ - - # Support both singular and plural variants - START_TOKENS = [ - "<|tool_calls_section_begin|>", - "<|tool_call_section_begin|>", - ] - - # Regex captures: tool_call_id (e.g., "functions.get_weather:0"), function_arguments - PATTERN = re.compile( - r"<\|tool_call_begin\|>\s*(?P[^<]+:\d+)\s*" - r"<\|tool_call_argument_begin\|>\s*" - r"(?P(?:(?!<\|tool_call_begin\|>).)*?)\s*" - r"<\|tool_call_end\|>", - re.DOTALL, - ) - - def parse(self, text: str) -> ParseResult: - # Check for any variant of the start token - has_start = any(token in text for token in self.START_TOKENS) - if not has_start: - return text, None - - try: - matches = self.PATTERN.findall(text) - if not matches: - return text, None - - tool_calls: List[ChatCompletionMessageToolCall] = [] - for match in matches: - function_id, function_args = match - - # Extract function name from ID format: "functions.get_weather:0" -> "get_weather" - function_name = function_id.split(":")[0].split(".")[-1] - - tool_calls.append( - ChatCompletionMessageToolCall( - id=function_id, # Preserve the original ID format - type="function", - function=Function( - name=function_name, - arguments=function_args.strip(), - ), - ) - ) - - if not tool_calls: - return text, None - - # Content is everything before the tool calls section - earliest_start = len(text) - for token in self.START_TOKENS: - idx = text.find(token) - if idx >= 0 and idx < earliest_start: - earliest_start = idx - - content = text[:earliest_start].strip() - return content if content else None, tool_calls - - except Exception: - return text, None diff --git a/environments/tool_call_parsers/llama_parser.py b/environments/tool_call_parsers/llama_parser.py deleted file mode 100644 index 8eb2136a11a8..000000000000 --- a/environments/tool_call_parsers/llama_parser.py +++ /dev/null @@ -1,96 +0,0 @@ -""" -Llama 3.x / 4 tool call parser. - -Format: The model outputs JSON objects with "name" and "arguments" (or "parameters") keys. -May be preceded by <|python_tag|> token. Supports multiple JSON objects separated -by content or semicolons. - -Based on VLLM's Llama3JsonToolParser.extract_tool_calls() -""" - -import json -import re -import uuid -from typing import List, Optional - -from openai.types.chat.chat_completion_message_tool_call import ( - ChatCompletionMessageToolCall, - Function, -) - -from environments.tool_call_parsers import ParseResult, ToolCallParser, register_parser - - -@register_parser("llama3_json") -@register_parser("llama4_json") -class LlamaToolCallParser(ToolCallParser): - """ - Parser for Llama 3.x and 4 JSON-format tool calls. - - Finds JSON objects containing "name" + ("arguments" or "parameters") keys. - Uses Python's json.JSONDecoder.raw_decode for robust extraction of - JSON objects from mixed text. - """ - - BOT_TOKEN = "<|python_tag|>" - - # Regex to find the start of potential JSON objects - JSON_START = re.compile(r"\{") - - def parse(self, text: str) -> ParseResult: - # Quick check: need either the bot token or a JSON brace - if self.BOT_TOKEN not in text and "{" not in text: - return text, None - - try: - decoder = json.JSONDecoder() - tool_calls: List[ChatCompletionMessageToolCall] = [] - end_index = -1 # Track where the last parsed JSON ended - - for match in self.JSON_START.finditer(text): - start = match.start() - # Skip if this brace is inside a previously parsed JSON object - if start <= end_index: - continue - - try: - obj, json_end = decoder.raw_decode(text[start:]) - end_index = start + json_end - - # Must have "name" and either "arguments" or "parameters" - name = obj.get("name") - args = obj.get("arguments", obj.get("parameters")) - - if not name or args is None: - continue - - # Normalize arguments to JSON string - if isinstance(args, dict): - args = json.dumps(args, ensure_ascii=False) - elif not isinstance(args, str): - args = json.dumps(args, ensure_ascii=False) - - tool_calls.append( - ChatCompletionMessageToolCall( - id=f"call_{uuid.uuid4().hex[:8]}", - type="function", - function=Function(name=name, arguments=args), - ) - ) - except (json.JSONDecodeError, KeyError, ValueError): - continue - - if not tool_calls: - return text, None - - # Content is everything before the first tool call JSON - # Find where the first tool call starts in the text - first_tc_start = text.find("{") - if self.BOT_TOKEN in text: - first_tc_start = text.find(self.BOT_TOKEN) - content = text[:first_tc_start].strip() if first_tc_start > 0 else None - - return content, tool_calls - - except Exception: - return text, None diff --git a/environments/tool_call_parsers/longcat_parser.py b/environments/tool_call_parsers/longcat_parser.py deleted file mode 100644 index afecdb862926..000000000000 --- a/environments/tool_call_parsers/longcat_parser.py +++ /dev/null @@ -1,69 +0,0 @@ -""" -Longcat Flash Chat tool call parser. - -Same as Hermes but uses tags instead of . -Based on VLLM's LongcatFlashToolParser (extends Hermes2ProToolParser). -""" - -import json -import re -import uuid -from typing import List, Optional - -from openai.types.chat.chat_completion_message_tool_call import ( - ChatCompletionMessageToolCall, - Function, -) - -from environments.tool_call_parsers import ParseResult, ToolCallParser, register_parser - - -@register_parser("longcat") -class LongcatToolCallParser(ToolCallParser): - """ - Parser for Longcat Flash Chat tool calls. - Identical logic to Hermes, just different tag names. - """ - - PATTERN = re.compile( - r"\s*(.*?)\s*|\s*(.*)", - re.DOTALL, - ) - - def parse(self, text: str) -> ParseResult: - if "" not in text: - return text, None - - try: - matches = self.PATTERN.findall(text) - if not matches: - return text, None - - tool_calls: List[ChatCompletionMessageToolCall] = [] - for match in matches: - raw_json = match[0] if match[0] else match[1] - if not raw_json.strip(): - continue - - tc_data = json.loads(raw_json) - tool_calls.append( - ChatCompletionMessageToolCall( - id=f"call_{uuid.uuid4().hex[:8]}", - type="function", - function=Function( - name=tc_data["name"], - arguments=json.dumps( - tc_data.get("arguments", {}), ensure_ascii=False - ), - ), - ) - ) - - if not tool_calls: - return text, None - - content = text[: text.find("")].strip() - return content if content else None, tool_calls - - except Exception: - return text, None diff --git a/environments/tool_call_parsers/mistral_parser.py b/environments/tool_call_parsers/mistral_parser.py deleted file mode 100644 index a23684e87391..000000000000 --- a/environments/tool_call_parsers/mistral_parser.py +++ /dev/null @@ -1,137 +0,0 @@ -""" -Mistral tool call parser. - -Supports two formats depending on tokenizer version: -- Pre-v11: content[TOOL_CALLS] [{"name": ..., "arguments": {...}}, ...] -- v11+: content[TOOL_CALLS]tool_name1{"arg": "val"}[TOOL_CALLS]tool_name2{"arg": "val"} - -Based on VLLM's MistralToolParser.extract_tool_calls() -The [TOOL_CALLS] token is the bot_token used by Mistral models. -""" - -import json -import uuid -from typing import List, Optional - -from openai.types.chat.chat_completion_message_tool_call import ( - ChatCompletionMessageToolCall, - Function, -) - -from environments.tool_call_parsers import ParseResult, ToolCallParser, register_parser - - -def _generate_mistral_id() -> str: - """Mistral tool call IDs are 9-char alphanumeric strings.""" - import random - import string - - return "".join(random.choices(string.ascii_letters + string.digits, k=9)) - - -@register_parser("mistral") -class MistralToolCallParser(ToolCallParser): - """ - Parser for Mistral-format tool calls. - - Detects format by checking if the content after [TOOL_CALLS] starts with '[' - (pre-v11 JSON array) or with a tool name (v11+ format). - """ - - # The [TOOL_CALLS] token -- may appear as different strings depending on tokenizer - BOT_TOKEN = "[TOOL_CALLS]" - - def parse(self, text: str) -> ParseResult: - if self.BOT_TOKEN not in text: - return text, None - - try: - parts = text.split(self.BOT_TOKEN) - content = parts[0].strip() - raw_tool_calls = parts[1:] - - # Detect format: if the first raw part starts with '[', it's pre-v11 - first_raw = raw_tool_calls[0].strip() if raw_tool_calls else "" - is_pre_v11 = first_raw.startswith("[") or first_raw.startswith("{") - - tool_calls: List[ChatCompletionMessageToolCall] = [] - - if not is_pre_v11: - # v11+ format: [TOOL_CALLS]tool_name{args}[TOOL_CALLS]tool_name2{args2} - for raw in raw_tool_calls: - raw = raw.strip() - if not raw or "{" not in raw: - continue - - brace_idx = raw.find("{") - tool_name = raw[:brace_idx].strip() - args_str = raw[brace_idx:] - - # Validate and clean the JSON arguments - try: - parsed_args = json.loads(args_str) - args_str = json.dumps(parsed_args, ensure_ascii=False) - except json.JSONDecodeError: - pass # Keep raw if parsing fails - - tool_calls.append( - ChatCompletionMessageToolCall( - id=_generate_mistral_id(), - type="function", - function=Function(name=tool_name, arguments=args_str), - ) - ) - else: - # Pre-v11 format: [TOOL_CALLS] [{"name": ..., "arguments": {...}}] - try: - parsed = json.loads(first_raw) - if isinstance(parsed, dict): - parsed = [parsed] - - for tc in parsed: - if "name" not in tc: - continue - args = tc.get("arguments", {}) - if isinstance(args, dict): - args = json.dumps(args, ensure_ascii=False) - - tool_calls.append( - ChatCompletionMessageToolCall( - id=_generate_mistral_id(), - type="function", - function=Function( - name=tc["name"], arguments=args - ), - ) - ) - except json.JSONDecodeError: - # Fallback: extract JSON objects using raw_decode - decoder = json.JSONDecoder() - idx = 0 - while idx < len(first_raw): - try: - obj, end_idx = decoder.raw_decode(first_raw, idx) - if isinstance(obj, dict) and "name" in obj: - args = obj.get("arguments", {}) - if isinstance(args, dict): - args = json.dumps(args, ensure_ascii=False) - tool_calls.append( - ChatCompletionMessageToolCall( - id=_generate_mistral_id(), - type="function", - function=Function( - name=obj["name"], arguments=args - ), - ) - ) - idx = end_idx - except json.JSONDecodeError: - idx += 1 - - if not tool_calls: - return text, None - - return content if content else None, tool_calls - - except Exception: - return text, None diff --git a/environments/tool_call_parsers/qwen3_coder_parser.py b/environments/tool_call_parsers/qwen3_coder_parser.py deleted file mode 100644 index 042e46f7bf9a..000000000000 --- a/environments/tool_call_parsers/qwen3_coder_parser.py +++ /dev/null @@ -1,163 +0,0 @@ -""" -Qwen3-Coder tool call parser. - -Format uses XML-style nested tags: - - - value - value2 - - - -Parameters are extracted from value tags and -type-converted using the schema if available, otherwise treated as strings. - -Based on VLLM's Qwen3CoderToolParser.extract_tool_calls() -""" - -import ast -import json -import re -import uuid -from typing import Any, Dict, List, Optional - -from openai.types.chat.chat_completion_message_tool_call import ( - ChatCompletionMessageToolCall, - Function, -) - -from environments.tool_call_parsers import ParseResult, ToolCallParser, register_parser - - -def _try_convert_value(value: str) -> Any: - """ - Try to convert a parameter value string to a native Python type. - Handles null, numbers, booleans, JSON objects/arrays, and falls back to string. - """ - stripped = value.strip() - - # Handle null - if stripped.lower() == "null": - return None - - # Try JSON first (handles objects, arrays, strings, numbers, booleans) - try: - return json.loads(stripped) - except (json.JSONDecodeError, TypeError): - pass - - # Try Python literal eval (handles tuples, etc.) - try: - return ast.literal_eval(stripped) - except (ValueError, SyntaxError, TypeError): - pass - - # Return as string - return stripped - - -@register_parser("qwen3_coder") -class Qwen3CoderToolCallParser(ToolCallParser): - """ - Parser for Qwen3-Coder XML-format tool calls. - - Uses nested XML tags: val - """ - - START_TOKEN = "" - FUNCTION_PREFIX = "(.*?)|(.*?)$", re.DOTALL - ) - - # Find function blocks within a tool_call - FUNCTION_REGEX = re.compile( - r"||(?=)|$)", - re.DOTALL, - ) - - def _parse_function_call(self, function_str: str) -> Optional[ChatCompletionMessageToolCall]: - """Parse a single ... block into a ToolCall.""" - try: - # Extract function name: everything before the first '>' - gt_idx = function_str.index(">") - func_name = function_str[:gt_idx].strip() - params_str = function_str[gt_idx + 1:] - - # Extract parameters - param_dict: Dict[str, Any] = {} - for match_text in self.PARAMETER_REGEX.findall(params_str): - if ">" not in match_text: - continue - eq_idx = match_text.index(">") - param_name = match_text[:eq_idx].strip() - param_value = match_text[eq_idx + 1:] - - # Clean up whitespace - if param_value.startswith("\n"): - param_value = param_value[1:] - if param_value.endswith("\n"): - param_value = param_value[:-1] - - param_dict[param_name] = _try_convert_value(param_value) - - return ChatCompletionMessageToolCall( - id=f"call_{uuid.uuid4().hex[:24]}", - type="function", - function=Function( - name=func_name, - arguments=json.dumps(param_dict, ensure_ascii=False), - ), - ) - except (ValueError, IndexError): - return None - - def parse(self, text: str) -> ParseResult: - if self.FUNCTION_PREFIX not in text: - return text, None - - try: - # Find all tool_call blocks - tc_matches = self.TOOL_CALL_REGEX.findall(text) - raw_blocks = [m[0] if m[0] else m[1] for m in tc_matches] - - # Fallback: if no tool_call tags, try the whole text - if not raw_blocks: - raw_blocks = [text] - - # Find function blocks within each tool_call - function_strs: List[str] = [] - for block in raw_blocks: - func_matches = self.FUNCTION_REGEX.findall(block) - function_strs.extend(m[0] if m[0] else m[1] for m in func_matches) - - if not function_strs: - return text, None - - # Parse each function call - tool_calls: List[ChatCompletionMessageToolCall] = [] - for func_str in function_strs: - tc = self._parse_function_call(func_str) - if tc is not None: - tool_calls.append(tc) - - if not tool_calls: - return text, None - - # Content before tool calls - first_tc = text.find(self.START_TOKEN) - if first_tc < 0: - first_tc = text.find(self.FUNCTION_PREFIX) - content = text[:first_tc].strip() if first_tc > 0 else None - - return content, tool_calls - - except Exception: - return text, None diff --git a/environments/tool_call_parsers/qwen_parser.py b/environments/tool_call_parsers/qwen_parser.py deleted file mode 100644 index 9c8a8141997d..000000000000 --- a/environments/tool_call_parsers/qwen_parser.py +++ /dev/null @@ -1,19 +0,0 @@ -""" -Qwen 2.5 tool call parser. - -Uses the same format as Hermes. -Registered as a separate parser name for clarity when using --tool-parser=qwen. -""" - -from environments.tool_call_parsers import register_parser -from environments.tool_call_parsers.hermes_parser import HermesToolCallParser - - -@register_parser("qwen") -class QwenToolCallParser(HermesToolCallParser): - """ - Parser for Qwen 2.5 tool calls. - Same {"name": ..., "arguments": ...} format as Hermes. - """ - - pass # Identical format -- inherits everything from Hermes diff --git a/environments/tool_context.py b/environments/tool_context.py deleted file mode 100644 index 9756dadaf7c5..000000000000 --- a/environments/tool_context.py +++ /dev/null @@ -1,473 +0,0 @@ -""" -ToolContext -- Unrestricted Tool Access for Reward Functions - -A per-rollout handle that gives reward/verification functions direct access to -ALL hermes-agent tools, scoped to the rollout's task_id. The same task_id means -the terminal/browser session is the SAME one the model used during its rollout -- -all state (files, processes, browser tabs) is preserved. - -The verifier author decides which tools to use. Nothing is hardcoded or gated. - -Example usage in a compute_reward(): - async def compute_reward(self, item, result, ctx): - # Run tests in the model's terminal sandbox - test = ctx.terminal("pytest -v") - if test["exit_code"] == 0: - return 1.0 - - # Check if a file was created - content = ctx.read_file("/workspace/solution.py") - if content.get("content"): - return 0.5 - - return 0.0 -""" - -import json -import logging -import os -from typing import Any, Dict, List, Optional - -import asyncio -import concurrent.futures - -from model_tools import handle_function_call -from tools.terminal_tool import cleanup_vm -from tools.browser_tool import cleanup_browser - -logger = logging.getLogger(__name__) - -# Thread pool for running sync tool calls that internally use asyncio.run() -_tool_executor = concurrent.futures.ThreadPoolExecutor(max_workers=4) - - -def _run_tool_in_thread(tool_name: str, arguments: Dict[str, Any], task_id: str) -> str: - """ - Run a tool call in a thread pool executor so backends that use asyncio.run() - internally (modal, docker, daytona) get a clean event loop. - - If we're already in an async context, executes handle_function_call() in a - disposable worker thread and blocks for the result. - If not (e.g., called from sync code), runs directly. - """ - try: - loop = asyncio.get_running_loop() - # We're in an async context -- need to run in thread - with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool: - future = pool.submit( - handle_function_call, tool_name, arguments, task_id - ) - return future.result(timeout=300) - except RuntimeError: - # No running event loop -- safe to call directly - return handle_function_call(tool_name, arguments, task_id) - - -class ToolContext: - """ - Open-ended access to all hermes-agent tools for a specific rollout. - - Passed to compute_reward() so verifiers can use any tool they need: - terminal commands, file reads/writes, web searches, browser automation, etc. - All calls share the rollout's task_id for session isolation. - """ - - def __init__(self, task_id: str): - self.task_id = task_id - - # ------------------------------------------------------------------------- - # Terminal tools - # ------------------------------------------------------------------------- - - def terminal(self, command: str, timeout: int = 180) -> Dict[str, Any]: - """ - Run a command in the rollout's terminal session. - - Args: - command: Shell command to execute - timeout: Command timeout in seconds - - Returns: - Dict with 'exit_code' (int) and 'output' (str) - """ - import os - backend = os.getenv("TERMINAL_ENV", "local") - logger.debug("ToolContext.terminal [%s backend] task=%s: %s", backend, self.task_id[:8], command[:100]) - - # Run via thread helper so modal/docker/daytona backends' asyncio.run() doesn't deadlock - result = _run_tool_in_thread( - "terminal", - {"command": command, "timeout": timeout}, - self.task_id, - ) - try: - return json.loads(result) - except json.JSONDecodeError: - return {"exit_code": -1, "output": result} - - # ------------------------------------------------------------------------- - # File tools - # ------------------------------------------------------------------------- - - def read_file(self, path: str) -> Dict[str, Any]: - """ - Read a file from the rollout's filesystem. - - Args: - path: File path to read - - Returns: - Dict with file content or error - """ - result = handle_function_call( - "read_file", {"path": path}, task_id=self.task_id - ) - try: - return json.loads(result) - except json.JSONDecodeError: - return {"error": result} - - def write_file(self, path: str, content: str) -> Dict[str, Any]: - """ - Write a TEXT file in the rollout's filesystem. - - Uses a shell heredoc under the hood, so this is only safe for text content. - For binary files (images, compiled artifacts, etc.), use upload_file() instead. - - Args: - path: File path to write - content: Text content to write - - Returns: - Dict with success status or error - """ - result = handle_function_call( - "write_file", {"path": path, "content": content}, task_id=self.task_id - ) - try: - return json.loads(result) - except json.JSONDecodeError: - return {"error": result} - - def upload_file(self, local_path: str, remote_path: str) -> Dict[str, Any]: - """ - Upload a local file to the rollout's sandbox (binary-safe). - - Unlike write_file() which passes content through a shell heredoc (text-only), - this method base64-encodes the file and decodes it inside the sandbox. - Safe for any file type: binaries, images, archives, etc. - - For large files (>1MB), the content is split into chunks to avoid - hitting shell command-length limits. - - Args: - local_path: Path to a local file on the host - remote_path: Destination path inside the sandbox - - Returns: - Dict with 'exit_code' and 'output' - """ - import base64 - from pathlib import Path as _Path - - local = _Path(local_path) - if not local.exists(): - return {"exit_code": -1, "output": f"Local file not found: {local_path}"} - - raw = local.read_bytes() - b64 = base64.b64encode(raw).decode("ascii") - - # Ensure parent directory exists in the sandbox - parent = str(_Path(remote_path).parent) - if parent not in {".", "/"}: - self.terminal(f"mkdir -p {parent}", timeout=10) - - # For small files, single command is fine - chunk_size = 60_000 # ~60KB per chunk (well within shell limits) - if len(b64) <= chunk_size: - result = self.terminal( - f"printf '%s' '{b64}' | base64 -d > {remote_path}", - timeout=30, - ) - else: - # For larger files, write base64 in chunks then decode - tmp_b64 = "/tmp/_hermes_upload.b64" - self.terminal(f": > {tmp_b64}", timeout=5) # truncate - for i in range(0, len(b64), chunk_size): - chunk = b64[i : i + chunk_size] - self.terminal(f"printf '%s' '{chunk}' >> {tmp_b64}", timeout=15) - result = self.terminal( - f"base64 -d {tmp_b64} > {remote_path} && rm -f {tmp_b64}", - timeout=30, - ) - - return result - - def upload_dir(self, local_dir: str, remote_dir: str) -> List[Dict[str, Any]]: - """ - Upload an entire local directory to the rollout's sandbox (binary-safe). - - Recursively uploads all files, preserving directory structure. - - Args: - local_dir: Path to a local directory on the host - remote_dir: Destination directory inside the sandbox - - Returns: - List of results, one per file uploaded - """ - from pathlib import Path as _Path - - local = _Path(local_dir) - if not local.exists() or not local.is_dir(): - return [{"exit_code": -1, "output": f"Local directory not found: {local_dir}"}] - - results = [] - for file_path in sorted(local.rglob("*")): - if file_path.is_file(): - relative = file_path.relative_to(local) - target = f"{remote_dir}/{relative}" - results.append(self.upload_file(str(file_path), target)) - return results - - def download_file(self, remote_path: str, local_path: str) -> Dict[str, Any]: - """ - Download a file from the rollout's sandbox to the host (binary-safe). - - The inverse of upload_file(). Base64-encodes the file inside the sandbox, - reads the encoded data through the terminal, and decodes it locally. - Safe for any file type. - - Args: - remote_path: Path to the file inside the sandbox - local_path: Destination path on the host - - Returns: - Dict with 'success' (bool) and 'bytes' (int) or 'error' (str) - """ - import base64 - from pathlib import Path as _Path - - # Base64-encode the file inside the sandbox and capture output - result = self.terminal( - f"base64 {remote_path} 2>/dev/null", - timeout=30, - ) - - if result.get("exit_code", -1) != 0: - return { - "success": False, - "error": f"Failed to read remote file: {result.get('output', '')}", - } - - b64_data = result.get("output", "").strip() - if not b64_data: - return {"success": False, "error": f"Remote file is empty or missing: {remote_path}"} - - try: - raw = base64.b64decode(b64_data) - except Exception as e: - return {"success": False, "error": f"Base64 decode failed: {e}"} - - # Write to local host filesystem - local = _Path(local_path) - local.parent.mkdir(parents=True, exist_ok=True) - local.write_bytes(raw) - - return {"success": True, "bytes": len(raw)} - - def download_dir(self, remote_dir: str, local_dir: str) -> List[Dict[str, Any]]: - """ - Download a directory from the rollout's sandbox to the host (binary-safe). - - Lists all files in the remote directory, then downloads each one. - Preserves directory structure. - - Args: - remote_dir: Path to the directory inside the sandbox - local_dir: Destination directory on the host - - Returns: - List of results, one per file downloaded - """ - from pathlib import Path as _Path - - # List files in the remote directory - ls_result = self.terminal( - f"find {remote_dir} -type f 2>/dev/null", - timeout=15, - ) - - if ls_result.get("exit_code", -1) != 0: - return [{"success": False, "error": f"Failed to list remote dir: {remote_dir}"}] - - file_list = ls_result.get("output", "").strip() - if not file_list: - return [{"success": False, "error": f"Remote directory is empty or missing: {remote_dir}"}] - - results = [] - for remote_file in file_list.splitlines(): - remote_file = remote_file.strip() - if not remote_file: - continue - # Compute the relative path to preserve directory structure - if remote_file.startswith(remote_dir): - relative = remote_file[len(remote_dir):].lstrip("/") - else: - relative = _Path(remote_file).name - local_file = str(_Path(local_dir) / relative) - results.append(self.download_file(remote_file, local_file)) - - return results - - def search(self, query: str, path: str = ".") -> Dict[str, Any]: - """ - Search for text in the rollout's filesystem. - - Args: - query: Search query - path: Directory to search in - - Returns: - Dict with search results - """ - result = handle_function_call( - "search_files", {"pattern": query, "path": path}, task_id=self.task_id - ) - try: - return json.loads(result) - except json.JSONDecodeError: - return {"error": result} - - # ------------------------------------------------------------------------- - # Web tools - # ------------------------------------------------------------------------- - - def web_search(self, query: str) -> Dict[str, Any]: - """ - Search the web. - - Args: - query: Search query - - Returns: - Dict with search results - """ - result = handle_function_call("web_search", {"query": query}) - try: - return json.loads(result) - except json.JSONDecodeError: - return {"error": result} - - def web_extract(self, urls: List[str]) -> Dict[str, Any]: - """ - Extract content from URLs. - - Args: - urls: List of URLs to extract content from - - Returns: - Dict with extracted content - """ - result = handle_function_call("web_extract", {"urls": urls}) - try: - return json.loads(result) - except json.JSONDecodeError: - return {"error": result} - - # ------------------------------------------------------------------------- - # Browser tools - # ------------------------------------------------------------------------- - - def browser_navigate(self, url: str) -> Dict[str, Any]: - """ - Navigate the rollout's browser session to a URL. - - Args: - url: URL to navigate to - - Returns: - Dict with page snapshot or error - """ - result = handle_function_call( - "browser_navigate", {"url": url}, task_id=self.task_id - ) - try: - return json.loads(result) - except json.JSONDecodeError: - return {"error": result} - - def browser_snapshot(self) -> Dict[str, Any]: - """ - Take a snapshot of the current browser page. - - Returns: - Dict with page content/accessibility snapshot - """ - result = handle_function_call( - "browser_snapshot", {}, task_id=self.task_id - ) - try: - return json.loads(result) - except json.JSONDecodeError: - return {"error": result} - - # ------------------------------------------------------------------------- - # Generic tool access - # ------------------------------------------------------------------------- - - def call_tool(self, tool_name: str, arguments: Dict[str, Any]) -> str: - """ - Call any hermes-agent tool by name. - - This is the generic escape hatch -- if a tool doesn't have a convenience - wrapper above, you can call it directly here. - - Args: - tool_name: Name of the tool (e.g., "vision_analyze", "skills_list") - arguments: Dict of arguments for the tool - - Returns: - Raw JSON string result from the tool - """ - return _run_tool_in_thread(tool_name, arguments, self.task_id) - - # ------------------------------------------------------------------------- - # Cleanup - # ------------------------------------------------------------------------- - - def cleanup(self): - """ - Release all resources (terminal VMs, browser sessions, background processes) - for this rollout. - - Called automatically by the base environment via try/finally after - compute_reward() completes. You generally don't need to call this yourself. - """ - # Kill any background processes from this rollout (safety net) - try: - from tools.process_registry import process_registry - killed = process_registry.kill_all(task_id=self.task_id) - if killed: - logger.debug("Process cleanup for task %s: killed %d process(es)", self.task_id, killed) - except Exception as e: - logger.debug("Process cleanup for task %s: %s", self.task_id, e) - - try: - cleanup_vm(self.task_id) - except Exception as e: - logger.debug("VM cleanup for task %s: %s", self.task_id, e) - - # Suppress browser_tool's noisy debug prints during cleanup. - # The cleanup still runs (safe), it just doesn't spam the console. - _prev_quiet = os.environ.get("HERMES_QUIET") - os.environ["HERMES_QUIET"] = "1" - try: - cleanup_browser(self.task_id) - except Exception as e: - logger.debug("Browser cleanup for task %s: %s", self.task_id, e) - finally: - if _prev_quiet is None: - os.environ.pop("HERMES_QUIET", None) - else: - os.environ["HERMES_QUIET"] = _prev_quiet diff --git a/environments/web_research_env.py b/environments/web_research_env.py deleted file mode 100644 index c637a7cbeaed..000000000000 --- a/environments/web_research_env.py +++ /dev/null @@ -1,719 +0,0 @@ -""" -WebResearchEnv โ€” RL Environment for Multi-Step Web Research -============================================================ - -Trains models to do accurate, efficient, multi-source web research. - -Reward signals: - - Answer correctness (LLM judge, 0.0โ€“1.0) - - Source diversity (used โ‰ฅ2 distinct domains) - - Efficiency (penalizes excessive tool calls) - - Tool usage (bonus for actually using web tools) - -Dataset: FRAMES benchmark (Google, 2024) โ€” multi-hop factual questions - HuggingFace: google/frames-benchmark - Fallback: built-in sample questions (no HF token needed) - -Usage: - # Phase 1 (OpenAI-compatible server) - python environments/web_research_env.py serve \\ - --openai.base_url http://localhost:8000/v1 \\ - --openai.model_name YourModel \\ - --openai.server_type openai - - # Process mode (offline data generation) - python environments/web_research_env.py process \\ - --env.data_path_to_save_groups data/web_research.jsonl - - # Standalone eval - python environments/web_research_env.py evaluate \\ - --openai.base_url http://localhost:8000/v1 \\ - --openai.model_name YourModel - -Built by: github.com/jackx707 -Inspired by: GroceryMind โ€” production Hermes agent doing live web research - across German grocery stores (firecrawl + hermes-agent) -""" - -from __future__ import annotations - -import asyncio -import json -import logging -import os -import random -import re -import sys -from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple -from urllib.parse import urlparse - -from pydantic import Field - -# Ensure hermes-agent root is on path -_repo_root = Path(__file__).resolve().parent.parent -if str(_repo_root) not in sys.path: - sys.path.insert(0, str(_repo_root)) - -# --------------------------------------------------------------------------- -# Optional HuggingFace datasets import -# --------------------------------------------------------------------------- -try: - from datasets import load_dataset - HF_AVAILABLE = True -except ImportError: - HF_AVAILABLE = False - -from atroposlib.envs.base import ScoredDataGroup -from atroposlib.envs.server_handling.server_manager import APIServerConfig -from atroposlib.type_definitions import Item - -from environments.hermes_base_env import HermesAgentBaseEnv, HermesAgentEnvConfig -from environments.agent_loop import AgentResult -from environments.tool_context import ToolContext - -logger = logging.getLogger(__name__) - -# --------------------------------------------------------------------------- -# Fallback sample dataset (used when HuggingFace is unavailable) -# Multi-hop questions requiring real web search to answer. -# --------------------------------------------------------------------------- -SAMPLE_QUESTIONS = [ - { - "question": "What is the current population of the capital city of the country that won the 2022 FIFA World Cup?", - "answer": "Buenos Aires has approximately 3 million people in the city proper, or around 15 million in the greater metro area.", - "difficulty": "medium", - "hops": 2, - }, - { - "question": "Who is the CEO of the company that makes the most widely used open-source container orchestration platform?", - "answer": "The Linux Foundation oversees Kubernetes. CNCF (Cloud Native Computing Foundation) is the specific body โ€” it does not have a traditional CEO but has an executive director.", - "difficulty": "medium", - "hops": 2, - }, - { - "question": "What programming language was used to write the original version of the web framework used by Instagram?", - "answer": "Django, which Instagram was built on, is written in Python.", - "difficulty": "easy", - "hops": 2, - }, - { - "question": "In what year was the university founded where the inventor of the World Wide Web currently holds a professorship?", - "answer": "Tim Berners-Lee holds a professorship at MIT (founded 1861) and the University of Southampton (founded 1952).", - "difficulty": "hard", - "hops": 3, - }, - { - "question": "What is the latest stable version of the programming language that ranks #1 on the TIOBE index as of this year?", - "answer": "Python is currently #1 on TIOBE. The latest stable version should be verified via the official python.org site.", - "difficulty": "medium", - "hops": 2, - }, - { - "question": "How many employees does the parent company of Instagram have?", - "answer": "Meta Platforms (parent of Instagram) employs approximately 70,000+ people as of recent reports.", - "difficulty": "medium", - "hops": 2, - }, - { - "question": "What is the current interest rate set by the central bank of the country where the Eiffel Tower is located?", - "answer": "The European Central Bank sets rates for France/eurozone. The current rate should be verified โ€” it has changed frequently in 2023-2025.", - "difficulty": "hard", - "hops": 2, - }, - { - "question": "Which company acquired the startup founded by the creator of Oculus VR?", - "answer": "Palmer Luckey founded Oculus VR, which was acquired by Facebook (now Meta). He later founded Anduril Industries.", - "difficulty": "medium", - "hops": 2, - }, - { - "question": "What is the market cap of the company that owns the most popular search engine in Russia?", - "answer": "Yandex (now split into separate entities after 2024 restructuring). Current market cap should be verified via financial sources.", - "difficulty": "hard", - "hops": 2, - }, - { - "question": "What was the GDP growth rate of the country that hosted the most recent Summer Olympics?", - "answer": "Paris, France hosted the 2024 Summer Olympics. France's recent GDP growth should be verified via World Bank or IMF data.", - "difficulty": "hard", - "hops": 2, - }, -] - - -# --------------------------------------------------------------------------- -# Configuration -# --------------------------------------------------------------------------- - -class WebResearchEnvConfig(HermesAgentEnvConfig): - """Configuration for the web research RL environment.""" - - # Reward weights - correctness_weight: float = Field( - default=0.6, - description="Weight for answer correctness in reward (LLM judge score).", - ) - tool_usage_weight: float = Field( - default=0.2, - description="Weight for tool usage signal (did the model actually use web tools?).", - ) - efficiency_weight: float = Field( - default=0.2, - description="Weight for efficiency signal (penalizes excessive tool calls).", - ) - diversity_bonus: float = Field( - default=0.1, - description="Bonus reward for citing โ‰ฅ2 distinct domains.", - ) - - # Efficiency thresholds - efficient_max_calls: int = Field( - default=5, - description="Maximum tool calls before efficiency penalty begins.", - ) - heavy_penalty_calls: int = Field( - default=10, - description="Tool call count where efficiency penalty steepens.", - ) - - # Eval - eval_size: int = Field( - default=20, - description="Number of held-out items for evaluation.", - ) - eval_split_ratio: float = Field( - default=0.1, - description="Fraction of dataset to hold out for evaluation (0.0โ€“1.0).", - ) - - # Dataset - dataset_name: str = Field( - default="google/frames-benchmark", - description="HuggingFace dataset name for research questions.", - ) - - -# --------------------------------------------------------------------------- -# Environment -# --------------------------------------------------------------------------- - -class WebResearchEnv(HermesAgentBaseEnv): - """ - RL environment for training multi-step web research skills. - - The model is given a factual question requiring 2-3 hops of web research - and must use web_search / web_extract tools to find and synthesize the answer. - - Reward is multi-signal: - 60% โ€” answer correctness (LLM judge) - 20% โ€” tool usage (did the model actually search the web?) - 20% โ€” efficiency (penalizes >5 tool calls) - - Bonus +0.1 for source diversity (โ‰ฅ2 distinct domains cited). - """ - - name = "web-research" - env_config_cls = WebResearchEnvConfig - - # Default toolsets for this environment โ€” web + file for saving notes - default_toolsets = ["web", "file"] - - @classmethod - def config_init(cls) -> Tuple[WebResearchEnvConfig, List[APIServerConfig]]: - """Default configuration for the web research environment.""" - env_config = WebResearchEnvConfig( - enabled_toolsets=["web", "file"], - max_agent_turns=15, - agent_temperature=1.0, - system_prompt=( - "You are a highly capable research agent. When asked a factual question, " - "always use web_search to find current, accurate information before answering. " - "Cite at least 2 sources. Be concise and accurate." - ), - group_size=4, - total_steps=1000, - steps_per_eval=100, - use_wandb=True, - wandb_name="web-research", - ) - - server_configs = [ - APIServerConfig( - base_url="https://openrouter.ai/api/v1", - model_name="anthropic/claude-sonnet-4.5", - server_type="openai", - api_key=os.getenv("OPENROUTER_API_KEY", ""), - health_check=False, - ) - ] - - return env_config, server_configs - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self._items: list[dict] = [] - self._eval_items: list[dict] = [] - self._index: int = 0 - - # Metrics tracking for wandb - self._reward_buffer: list[float] = [] - self._correctness_buffer: list[float] = [] - self._tool_usage_buffer: list[float] = [] - self._efficiency_buffer: list[float] = [] - self._diversity_buffer: list[float] = [] - - # ------------------------------------------------------------------ - # 1. Setup โ€” load dataset - # ------------------------------------------------------------------ - - async def setup(self) -> None: - """Load the FRAMES benchmark or fall back to built-in samples.""" - if HF_AVAILABLE: - try: - logger.info("Loading FRAMES benchmark from HuggingFace...") - ds = load_dataset(self.config.dataset_name, split="test") - self._items = [ - { - "question": row["Prompt"], - "answer": row["Answer"], - "difficulty": row.get("reasoning_types", "unknown"), - "hops": 2, - } - for row in ds - ] - # Hold out for eval - eval_size = max( - self.config.eval_size, - int(len(self._items) * self.config.eval_split_ratio), - ) - random.shuffle(self._items) - self._eval_items = self._items[:eval_size] - self._items = self._items[eval_size:] - logger.info( - f"Loaded {len(self._items)} train / {len(self._eval_items)} eval items " - f"from FRAMES benchmark." - ) - return - except Exception as e: - logger.warning(f"Could not load FRAMES from HuggingFace: {e}. Using built-in samples.") - - # Fallback - random.shuffle(SAMPLE_QUESTIONS) - split = max(1, len(SAMPLE_QUESTIONS) * 8 // 10) - self._items = SAMPLE_QUESTIONS[:split] - self._eval_items = SAMPLE_QUESTIONS[split:] - logger.info( - f"Using built-in sample dataset: {len(self._items)} train / " - f"{len(self._eval_items)} eval items." - ) - - # ------------------------------------------------------------------ - # 2. get_next_item โ€” return the next question - # ------------------------------------------------------------------ - - async def get_next_item(self) -> dict: - """Return the next item, cycling through the dataset.""" - if not self._items: - raise RuntimeError("Dataset is empty. Did you call setup()?") - item = self._items[self._index % len(self._items)] - self._index += 1 - return item - - # ------------------------------------------------------------------ - # 3. format_prompt โ€” build the user-facing prompt - # ------------------------------------------------------------------ - - def format_prompt(self, item: dict) -> str: - """Format the research question as a task prompt.""" - return ( - f"Research the following question thoroughly using web search. " - f"You MUST search the web to find current, accurate information โ€” " - f"do not rely solely on your training data.\n\n" - f"Question: {item['question']}\n\n" - f"Requirements:\n" - f"- Use web_search and/or web_extract tools to find information\n" - f"- Search at least 2 different sources\n" - f"- Provide a concise, accurate answer (2-4 sentences)\n" - f"- Cite the sources you used" - ) - - # ------------------------------------------------------------------ - # 4. compute_reward โ€” multi-signal scoring - # ------------------------------------------------------------------ - - async def compute_reward( - self, - item: dict, - result: AgentResult, - ctx: ToolContext, - ) -> float: - """ - Multi-signal reward function: - - correctness_weight * correctness โ€” LLM judge comparing answer to ground truth - tool_usage_weight * tool_used โ€” binary: did the model use web tools? - efficiency_weight * efficiency โ€” penalizes wasteful tool usage - + diversity_bonus โ€” source diversity (โ‰ฅ2 distinct domains) - """ - # Extract final response from messages (last assistant message with content) - final_response = "" - tools_used: list[str] = [] - for msg in reversed(result.messages): - if msg.get("role") == "assistant" and msg.get("content") and not final_response: - final_response = msg["content"] - # Collect tool names from tool call messages - if msg.get("role") == "assistant" and msg.get("tool_calls"): - for tc in msg["tool_calls"]: - fn = tc.get("function", {}) if isinstance(tc, dict) else {} - name = fn.get("name", "") - if name: - tools_used.append(name) - tool_call_count: int = result.turns_used or len(tools_used) - - cfg = self.config - - # ---- Signal 1: Answer correctness (LLM judge) ---------------- - correctness = await self._llm_judge( - question=item["question"], - expected=item["answer"], - model_answer=final_response, - ) - - # ---- Signal 2: Web tool usage -------------------------------- - web_tools = {"web_search", "web_extract", "search", "firecrawl"} - tool_used = 1.0 if any(t in web_tools for t in tools_used) else 0.0 - - # ---- Signal 3: Efficiency ------------------------------------ - if tool_call_count <= cfg.efficient_max_calls: - efficiency = 1.0 - elif tool_call_count <= cfg.heavy_penalty_calls: - efficiency = 1.0 - (tool_call_count - cfg.efficient_max_calls) * 0.08 - else: - efficiency = max(0.0, 1.0 - (tool_call_count - cfg.efficient_max_calls) * 0.12) - - # ---- Bonus: Source diversity --------------------------------- - domains = self._extract_domains(final_response) - diversity = cfg.diversity_bonus if len(domains) >= 2 else 0.0 - - # ---- Combine ------------------------------------------------ - reward = ( - cfg.correctness_weight * correctness - + cfg.tool_usage_weight * tool_used - + cfg.efficiency_weight * efficiency - + diversity - ) - reward = min(1.0, max(0.0, reward)) # clamp to [0, 1] - - # Track for wandb - self._reward_buffer.append(reward) - self._correctness_buffer.append(correctness) - self._tool_usage_buffer.append(tool_used) - self._efficiency_buffer.append(efficiency) - self._diversity_buffer.append(diversity) - - logger.debug( - f"Reward breakdown โ€” correctness={correctness:.2f}, " - f"tool_used={tool_used:.1f}, efficiency={efficiency:.2f}, " - f"diversity={diversity:.1f} โ†’ total={reward:.3f}" - ) - - return reward - - # ------------------------------------------------------------------ - # 5. evaluate โ€” run on held-out eval split - # ------------------------------------------------------------------ - - async def evaluate(self, *args, **kwargs) -> None: - """Run evaluation on the held-out split using the full agent loop with tools. - - Each eval item runs through the same agent loop as training โ€” - the model can use web_search, web_extract, etc. to research answers. - This measures actual agentic research capability, not just knowledge. - """ - import time - import uuid - from environments.agent_loop import HermesAgentLoop - from environments.tool_context import ToolContext - - items = self._eval_items - if not items: - logger.warning("No eval items available.") - return - - eval_size = min(self.config.eval_size, len(items)) - eval_items = items[:eval_size] - - logger.info(f"Running eval on {len(eval_items)} questions (with agent loop + tools)...") - start_time = time.time() - samples = [] - - # Resolve tools once for all eval items - tools, valid_names = self._resolve_tools_for_group() - - for i, item in enumerate(eval_items): - task_id = str(uuid.uuid4()) - logger.info(f"Eval [{i+1}/{len(eval_items)}]: {item['question'][:80]}...") - - try: - # Build messages - messages: List[Dict[str, Any]] = [] - if self.config.system_prompt: - messages.append({"role": "system", "content": self.config.system_prompt}) - messages.append({"role": "user", "content": self.format_prompt(item)}) - - # Run the full agent loop with tools - agent = HermesAgentLoop( - server=self.server, - tool_schemas=tools, - valid_tool_names=valid_names, - max_turns=self.config.max_agent_turns, - task_id=task_id, - temperature=0.0, # Deterministic for eval - max_tokens=self.config.max_token_length, - extra_body=self.config.extra_body, - budget_config=self.config.build_budget_config(), - ) - result = await agent.run(messages) - - # Extract final response and tool usage from messages - final_response = "" - tool_call_count = 0 - for msg in reversed(result.messages): - if msg.get("role") == "assistant" and msg.get("content") and not final_response: - final_response = msg["content"] - if msg.get("role") == "assistant" and msg.get("tool_calls"): - tool_call_count += len(msg["tool_calls"]) - - # Compute reward (includes LLM judge for correctness) - # Temporarily save buffer lengths so we can extract the - # correctness score without calling judge twice, and avoid - # polluting training metric buffers with eval data. - buf_len = len(self._correctness_buffer) - ctx = ToolContext(task_id) - try: - reward = await self.compute_reward(item, result, ctx) - finally: - ctx.cleanup() - - # Extract correctness from the buffer (compute_reward appended it) - # then remove eval entries from training buffers - correctness = ( - self._correctness_buffer[buf_len] - if len(self._correctness_buffer) > buf_len - else 0.0 - ) - # Roll back buffers to avoid polluting training metrics - for buf in ( - self._reward_buffer, self._correctness_buffer, - self._tool_usage_buffer, self._efficiency_buffer, - self._diversity_buffer, - ): - if len(buf) > buf_len: - buf.pop() - - samples.append({ - "prompt": item["question"], - "response": final_response[:500], - "expected": item["answer"], - "correctness": correctness, - "reward": reward, - "tool_calls": tool_call_count, - "turns": result.turns_used, - }) - - logger.info( - f" โ†’ correctness={correctness:.2f}, reward={reward:.3f}, " - f"tools={tool_call_count}, turns={result.turns_used}" - ) - - except Exception as e: - logger.error(f"Eval error on item: {e}") - samples.append({ - "prompt": item["question"], - "response": f"ERROR: {e}", - "expected": item["answer"], - "correctness": 0.0, - "reward": 0.0, - "tool_calls": 0, - "turns": 0, - }) - - end_time = time.time() - - # Compute aggregate metrics - correctness_scores = [s["correctness"] for s in samples] - rewards = [s["reward"] for s in samples] - tool_counts = [s["tool_calls"] for s in samples] - n = len(samples) - - eval_metrics = { - "eval/mean_correctness": sum(correctness_scores) / n if n else 0.0, - "eval/mean_reward": sum(rewards) / n if n else 0.0, - "eval/mean_tool_calls": sum(tool_counts) / n if n else 0.0, - "eval/tool_usage_rate": sum(1 for t in tool_counts if t > 0) / n if n else 0.0, - "eval/n_items": n, - } - - logger.info( - f"Eval complete โ€” correctness={eval_metrics['eval/mean_correctness']:.3f}, " - f"reward={eval_metrics['eval/mean_reward']:.3f}, " - f"tool_usage={eval_metrics['eval/tool_usage_rate']:.0%}" - ) - - await self.evaluate_log( - metrics=eval_metrics, - samples=samples, - start_time=start_time, - end_time=end_time, - ) - - # ------------------------------------------------------------------ - # 6. wandb_log โ€” custom metrics - # ------------------------------------------------------------------ - - async def wandb_log(self, wandb_metrics: Optional[Dict] = None) -> None: - """Log reward breakdown metrics to wandb.""" - if wandb_metrics is None: - wandb_metrics = {} - - if self._reward_buffer: - n = len(self._reward_buffer) - wandb_metrics["train/mean_reward"] = sum(self._reward_buffer) / n - wandb_metrics["train/mean_correctness"] = sum(self._correctness_buffer) / n - wandb_metrics["train/mean_tool_usage"] = sum(self._tool_usage_buffer) / n - wandb_metrics["train/mean_efficiency"] = sum(self._efficiency_buffer) / n - wandb_metrics["train/mean_diversity"] = sum(self._diversity_buffer) / n - wandb_metrics["train/total_rollouts"] = n - - # Accuracy buckets - wandb_metrics["train/correct_rate"] = ( - sum(1 for c in self._correctness_buffer if c >= 0.7) / n - ) - wandb_metrics["train/tool_usage_rate"] = ( - sum(1 for t in self._tool_usage_buffer if t > 0) / n - ) - - # Clear buffers - self._reward_buffer.clear() - self._correctness_buffer.clear() - self._tool_usage_buffer.clear() - self._efficiency_buffer.clear() - self._diversity_buffer.clear() - - await super().wandb_log(wandb_metrics) - - # ------------------------------------------------------------------ - # Private helpers - # ------------------------------------------------------------------ - - async def _llm_judge( - self, - question: str, - expected: str, - model_answer: str, - ) -> float: - """ - Use the server's LLM to judge answer correctness. - Falls back to keyword heuristic if LLM call fails. - """ - if not model_answer or not model_answer.strip(): - return 0.0 - - judge_prompt = ( - "You are an impartial judge evaluating the quality of an AI research answer.\n\n" - f"Question: {question}\n\n" - f"Reference answer: {expected}\n\n" - f"Model answer: {model_answer}\n\n" - "Score the model answer on a scale from 0.0 to 1.0 where:\n" - " 1.0 = fully correct and complete\n" - " 0.7 = mostly correct with minor gaps\n" - " 0.4 = partially correct\n" - " 0.1 = mentions relevant topic but wrong or very incomplete\n" - " 0.0 = completely wrong or no answer\n\n" - "Consider: factual accuracy, completeness, and relevance.\n" - 'Respond with ONLY a JSON object: {"score": , "reason": ""}' - ) - - try: - response = await self.server.chat_completion( - messages=[{"role": "user", "content": judge_prompt}], - n=1, - max_tokens=150, - temperature=0.0, - split="eval", - ) - text = response.choices[0].message.content if response.choices else "" - parsed = self._parse_judge_json(text) - if parsed is not None: - return float(parsed) - except Exception as e: - logger.debug(f"LLM judge failed: {e}. Using heuristic.") - - return self._heuristic_score(expected, model_answer) - - @staticmethod - def _parse_judge_json(text: str) -> Optional[float]: - """Extract the score float from LLM judge JSON response.""" - try: - clean = re.sub(r"```(?:json)?|```", "", text).strip() - data = json.loads(clean) - score = float(data.get("score", -1)) - if 0.0 <= score <= 1.0: - return score - except Exception: - match = re.search(r'"score"\s*:\s*([0-9.]+)', text) - if match: - score = float(match.group(1)) - if 0.0 <= score <= 1.0: - return score - return None - - @staticmethod - def _heuristic_score(expected: str, model_answer: str) -> float: - """Lightweight keyword overlap score as fallback.""" - stopwords = { - "the", "a", "an", "is", "are", "was", "were", "of", "in", "on", - "at", "to", "for", "with", "and", "or", "but", "it", "its", - "this", "that", "as", "by", "from", "be", "has", "have", "had", - } - - def tokenize(text: str) -> set: - tokens = re.findall(r'\b\w+\b', text.lower()) - return {t for t in tokens if t not in stopwords and len(t) > 2} - - expected_tokens = tokenize(expected) - answer_tokens = tokenize(model_answer) - - if not expected_tokens: - return 0.5 - - overlap = len(expected_tokens & answer_tokens) - union = len(expected_tokens | answer_tokens) - - jaccard = overlap / union if union > 0 else 0.0 - recall = overlap / len(expected_tokens) - return min(1.0, 0.4 * jaccard + 0.6 * recall) - - @staticmethod - def _extract_domains(text: str) -> set: - """Extract unique domains from URLs cited in the response.""" - urls = re.findall(r'https?://[^\s\)>\]"\']+', text) - domains = set() - for url in urls: - try: - parsed = urlparse(url) - domain = parsed.netloc.lower().lstrip("www.") - if domain: - domains.add(domain) - except Exception: - pass - return domains - - -# --------------------------------------------------------------------------- -# Entry point -# --------------------------------------------------------------------------- - -if __name__ == "__main__": - WebResearchEnv.cli() diff --git a/gateway/channel_directory.py b/gateway/channel_directory.py index ff4af85a89a8..1bd9bac806ff 100644 --- a/gateway/channel_directory.py +++ b/gateway/channel_directory.py @@ -73,6 +73,8 @@ async def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]: platforms["discord"] = _build_discord(adapter) elif platform == Platform.SLACK: platforms["slack"] = await _build_slack(adapter) + elif platform == Platform.TLON: + platforms["tlon"] = _build_tlon(adapter) except Exception as e: logger.warning("Channel directory: failed to build %s: %s", platform.value, e) @@ -208,6 +210,42 @@ async def _build_slack(adapter) -> List[Dict[str, Any]]: return channels +def _build_tlon(adapter) -> List[Dict[str, str]]: + """Enumerate monitored/discovered Tlon channels plus known DMs.""" + channels: List[Dict[str, str]] = [] + seen_ids: set[str] = set() + + monitored = sorted(getattr(adapter, "monitored_channels", set()) or []) + channel_to_group = getattr(adapter, "_channel_to_group", {}) or {} + group_names = getattr(adapter, "_group_names", {}) or {} + for nest in monitored: + parsed = None + try: + from gateway.platforms.tlon import _parse_channel_nest + parsed = _parse_channel_nest(nest) + except Exception: + parsed = None + + group_id = channel_to_group.get(nest, "") + group_name = group_names.get(group_id, group_id) + channel_name = parsed["name"] if parsed else nest + label = f"{group_name}/{channel_name}" if group_name else channel_name + channels.append({ + "id": nest, + "name": label, + "type": parsed["type"] if parsed else "channel", + "guild": group_name, + }) + seen_ids.add(nest) + + for entry in _build_from_sessions("tlon"): + if entry.get("id") not in seen_ids: + channels.append(entry) + seen_ids.add(entry.get("id")) + + return channels + + def _build_from_sessions(platform_name: str) -> List[Dict[str, str]]: """Pull known channels/contacts from sessions.json origin data.""" sessions_path = get_hermes_home() / "sessions" / "sessions.json" diff --git a/gateway/config.py b/gateway/config.py index b3b87e24664a..a82444c8a5b8 100644 --- a/gateway/config.py +++ b/gateway/config.py @@ -127,6 +127,7 @@ class Platform(Enum): BLUEBUBBLES = "bluebubbles" QQBOT = "qqbot" YUANBAO = "yuanbao" + TLON = "tlon" @classmethod def _missing_(cls, value): """Accept unknown platform names only for known plugin adapters. @@ -432,6 +433,9 @@ def from_dict(cls, data: Dict[str, Any]) -> "StreamingConfig": Platform.YUANBAO: lambda cfg: bool( cfg.extra.get("app_id") and cfg.extra.get("app_secret") ), + Platform.TLON: lambda cfg: bool( + cfg.extra.get("ship_url") and cfg.extra.get("ship_name") + ), Platform.DINGTALK: lambda cfg: bool( (cfg.extra.get("client_id") or os.getenv("DINGTALK_CLIENT_ID")) and (cfg.extra.get("client_secret") or os.getenv("DINGTALK_CLIENT_SECRET")) @@ -941,6 +945,14 @@ def load_gateway_config() -> GatewayConfig: if isinstance(ntc, list): ntc = ",".join(str(v) for v in ntc) os.environ["DISCORD_NO_THREAD_CHANNELS"] = str(ntc) + # history_backfill: recover missed channel messages for shared sessions + # when require_mention is active. Fetches messages between bot turns + # and prepends them to the user message for context. + if "history_backfill" in discord_cfg and not os.getenv("DISCORD_HISTORY_BACKFILL"): + os.environ["DISCORD_HISTORY_BACKFILL"] = str(discord_cfg["history_backfill"]).lower() + hbl = discord_cfg.get("history_backfill_limit") + if hbl is not None and not os.getenv("DISCORD_HISTORY_BACKFILL_LIMIT"): + os.environ["DISCORD_HISTORY_BACKFILL_LIMIT"] = str(hbl) # allow_mentions: granular control over what the bot can ping. # Safe defaults (no @everyone/roles) are applied in the adapter; # these YAML keys only override when set and let users opt back @@ -1798,6 +1810,27 @@ def _apply_env_overrides(config: GatewayConfig) -> None: if yuanbao_group_allow_from: extra["group_allow_from"] = yuanbao_group_allow_from + # Tlon (Urbit) + tlon_url = os.getenv("TLON_SHIP_URL") + tlon_name = os.getenv("TLON_SHIP_NAME") + tlon_code = os.getenv("TLON_SHIP_CODE") + if all([tlon_url, tlon_name, tlon_code]): + if Platform.TLON not in config.platforms: + config.platforms[Platform.TLON] = PlatformConfig() + config.platforms[Platform.TLON].enabled = True + config.platforms[Platform.TLON].extra.update({ + "ship_url": tlon_url, + "ship_name": tlon_name, + }) + tlon_home = os.getenv("TLON_HOME_CHANNEL") + if tlon_home: + config.platforms[Platform.TLON].home_channel = HomeChannel( + platform=Platform.TLON, + chat_id=tlon_home, + name=os.getenv("TLON_HOME_CHANNEL_NAME", "Home"), + thread_id=os.getenv("TLON_HOME_CHANNEL_THREAD_ID") or None, + ) + # Session settings idle_minutes = os.getenv("SESSION_IDLE_MINUTES") if idle_minutes: diff --git a/gateway/platforms/api_server.py b/gateway/platforms/api_server.py index 8b53db3a99f3..809d6cd8a030 100644 --- a/gateway/platforms/api_server.py +++ b/gateway/platforms/api_server.py @@ -356,15 +356,34 @@ def put(self, response_id: str, data: Dict[str, Any]) -> None: # Evict oldest entries beyond max_size count = self._conn.execute("SELECT COUNT(*) FROM responses").fetchone()[0] if count > self._max_size: - self._conn.execute( - "DELETE FROM responses WHERE response_id IN " - "(SELECT response_id FROM responses ORDER BY accessed_at ASC LIMIT ?)", - (count - self._max_size,), - ) + # Collect IDs that will be evicted + evict_ids = [ + row[0] + for row in self._conn.execute( + "SELECT response_id FROM responses ORDER BY accessed_at ASC LIMIT ?", + (count - self._max_size,), + ).fetchall() + ] + if evict_ids: + placeholders = ",".join("?" for _ in evict_ids) + # Clear conversation mappings pointing to evicted responses + self._conn.execute( + f"DELETE FROM conversations WHERE response_id IN ({placeholders})", + evict_ids, + ) + # Delete evicted responses + self._conn.execute( + f"DELETE FROM responses WHERE response_id IN ({placeholders})", + evict_ids, + ) self._conn.commit() def delete(self, response_id: str) -> bool: """Remove a response from the store. Returns True if found and deleted.""" + # Clear conversation mappings pointing to this response + self._conn.execute( + "DELETE FROM conversations WHERE response_id = ?", (response_id,) + ) cursor = self._conn.execute( "DELETE FROM responses WHERE response_id = ?", (response_id,) ) diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py index ad9dac170ee7..d03bc282ed34 100644 --- a/gateway/platforms/base.py +++ b/gateway/platforms/base.py @@ -955,6 +955,12 @@ class MessageEvent: # Per-channel ephemeral system prompt (e.g. Discord channel_prompts). # Applied at API call time and never persisted to transcript history. channel_prompt: Optional[str] = None + + # Channel context recovered by history backfill (e.g. messages between + # bot turns that were missed due to require_mention). Kept separate + # from ``text`` so the sender-prefix logic in run.py can operate on the + # trigger message alone, then prepend this context afterward. + channel_context: Optional[str] = None # Internal flag โ€” set for synthetic events (e.g. background process # completion notifications) that must bypass user authorization checks. diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py index 4793df35c7ce..a3904630fa96 100644 --- a/gateway/platforms/discord.py +++ b/gateway/platforms/discord.py @@ -589,6 +589,10 @@ def __init__(self, config: PlatformConfig): # chunk only, default), "all" (reply-reference on every chunk). self._reply_to_mode: str = getattr(config, 'reply_to_mode', 'first') or 'first' self._slash_commands: bool = self.config.extra.get("slash_commands", True) + # In-memory cache of the bot's last message ID per channel, used by + # history backfill to skip the full scan on hot paths. Falls back to + # scanning channel.history() on cache miss (cold start / restart). + self._last_self_message_id: Dict[str, str] = {} async def connect(self) -> bool: """Connect to Discord and start receiving events.""" @@ -1459,6 +1463,12 @@ async def send( raise message_ids.append(str(msg.id)) + # Track the last message we sent in this channel for history + # backfill โ€” avoids a full channel.history() scan on hot paths. + if message_ids: + _target_id = thread_id or chat_id + self._last_self_message_id[_target_id] = message_ids[-1] + return SendResult( success=True, message_id=message_ids[0] if message_ids else None, @@ -3596,6 +3606,134 @@ def _discord_thread_require_mention(self) -> bool: return bool(configured) return os.getenv("DISCORD_THREAD_REQUIRE_MENTION", "false").lower() in ("true", "1", "yes", "on") + def _discord_history_backfill(self) -> bool: + """Return whether history backfill is enabled for shared sessions.""" + configured = self.config.extra.get("history_backfill") + if configured is not None: + if isinstance(configured, str): + return configured.lower() not in ("false", "0", "no", "off") + return bool(configured) + return os.getenv("DISCORD_HISTORY_BACKFILL", "true").lower() in ("true", "1", "yes") + + def _discord_history_backfill_limit(self) -> int: + """Return the max number of messages to scan backwards for context. + + In practice the scan usually stops much earlier โ€” at the bot's own + last message in the channel (the natural partition point). This + limit is a safety cap for cold starts and long gaps where no prior + bot message exists in recent history. + """ + configured = self.config.extra.get("history_backfill_limit") + if configured is not None: + try: + return int(configured) + except (ValueError, TypeError): + pass + raw = os.getenv("DISCORD_HISTORY_BACKFILL_LIMIT", "50") + try: + return int(raw) + except (ValueError, TypeError): + return 50 + + async def _fetch_channel_context( + self, + channel: Any, + before: "DiscordMessage", + ) -> str: + """Fetch recent channel messages for conversational context. + + Scans backwards from *before* and collects messages until it hits + a message sent by this bot (the natural partition point between + bot turns) or reaches ``history_backfill_limit``. + + Returns a formatted block like:: + + [Recent channel messages] + [Alice] some message + [Bob [bot]] another message + + Returns an empty string if no context is available. + """ + limit = self._discord_history_backfill_limit() + if limit <= 0: + return "" + + # Determine which bot messages to include in context + allow_bots_raw = os.getenv("DISCORD_ALLOW_BOTS", "none").lower().strip() + include_other_bots = allow_bots_raw != "none" + + # Use the in-memory cache to narrow the fetch window on hot paths. + # If we know our last message ID in this channel, pass it as `after` + # to avoid scanning the full limit. Falls back to scanning on cache + # miss (cold start / restart). + # Guard: only use the cache when it's chronologically before the + # trigger โ€” Discord snowflake IDs are monotonically increasing, so + # a simple int comparison suffices. + channel_id = str(getattr(channel, "id", "")) + _cached_id = self._last_self_message_id.get(channel_id) + _after_obj = None + try: + if _cached_id and int(_cached_id) < int(before.id): + _after_obj = discord.Object(id=int(_cached_id)) + except (ValueError, TypeError): + pass # Malformed cache entry โ€” fall back to cold-start scan + + try: + collected = [] + # IMPORTANT: pass oldest_first=False explicitly. discord.py 2.x + # silently flips the default to True when `after=` is supplied, + # which would select the *earliest* N messages after our last + # response instead of the *latest* N before the trigger. In + # high-traffic windows that returns stale tool traces and drops + # the actual final answer. See the regression test + # `test_fetch_channel_context_cache_uses_latest_window_when_after_set`. + async for msg in channel.history( + limit=limit, + before=before, + after=_after_obj, + oldest_first=False, + ): + # Stop at our own message โ€” this is the partition point. + # Everything before this is already in the session transcript. + # (Redundant when _after_obj is set, but needed for cold start.) + if msg.author == self._client.user: + break + + # Skip system messages (pins, joins, thread renames, etc.) + if msg.type not in (discord.MessageType.default, discord.MessageType.reply): + continue + + # Respect DISCORD_ALLOW_BOTS for other bots. + # For history context, "mentions" is treated as "all" โ€” we are + # deciding what context to show, not whether to respond. + if getattr(msg.author, "bot", False) and not include_other_bots: + continue + + content = getattr(msg, "clean_content", msg.content) or "" + if not content and msg.attachments: + content = "(attachment)" + if not content: + continue + + name = msg.author.display_name + if getattr(msg.author, "bot", False): + name = f"{name} [bot]" + collected.append(f"[{name}] {content}") + + if not collected: + return "" + + # channel.history returns newest-first (oldest_first=False); reverse for chronological order + collected.reverse() + return "[Recent channel messages]\n" + "\n".join(collected) + + except discord.Forbidden: + logger.debug("[%s] Missing permissions to fetch channel history", self.name) + return "" + except Exception as e: + logger.warning("[%s] Failed to fetch channel history: %s", self.name, e) + return "" + def _thread_parent_channel(self, channel: Any) -> Any: """Return the parent text channel when invoked from a thread.""" return getattr(channel, "parent", None) or channel @@ -4504,9 +4642,50 @@ async def _handle_message(self, message: DiscordMessage) -> None: if pending_text_injection: event_text = f"{pending_text_injection}\n\n{event_text}" if event_text else pending_text_injection + # โ”€โ”€ History backfill โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + # When require_mention is active, the bot only processes messages + # that @mention it. Messages in the channel between bot turns are + # invisible to the session transcript. To recover that context, + # fetch recent channel history and prepend it to the user message. + # + # The fetch window is: everything after the bot's last message in + # the channel up to (but not including) the current trigger. On + # cold start (no prior bot message found), fetch the last N messages + # and stop at the first self-message encountered. + # + # Threads naturally scope to thread-only history (channel.history() + # on a thread returns only that thread's messages). DMs are skipped + # because every DM message triggers the bot โ€” there's no mention gap + # to fill; the session transcript already has everything. + # + # Per-user sessions also benefit: Alice's session is missing the + # other-channel-participants' context, and her own messages from + # before she mentioned the bot. Backfill fills that gap. + # + # Messages that arrive while the bot is processing (between trigger + # and response) are not captured โ€” this is an accepted simplification + # to keep the partition rule clean. + _channel_context = None + _is_dm = isinstance(message.channel, discord.DMChannel) + if not _is_dm: + _needed_mention = ( + require_mention + and not is_free_channel + and not in_bot_thread + ) + _backfill_enabled = self._discord_history_backfill() + if _needed_mention and _backfill_enabled: + _backfill_text = await self._fetch_channel_context( + message.channel, before=message, + ) + if _backfill_text: + _channel_context = _backfill_text + # Defense-in-depth: prevent empty user messages from entering session - # (can happen when user sends @mention-only with no other text) - if not event_text or not event_text.strip(): + # (can happen when user sends @mention-only with no other text). + # When channel_context is present, a bare mention means "catch me up" + # โ€” the context IS the message, so skip the placeholder. + if (not event_text or not event_text.strip()) and not _channel_context: event_text = "(The user sent a message with no text content)" _chan = message.channel @@ -4535,6 +4714,7 @@ async def _handle_message(self, message: DiscordMessage) -> None: timestamp=message.created_at, auto_skill=_skills, channel_prompt=_channel_prompt, + channel_context=_channel_context, ) # Track thread participation so the bot won't require @mention for diff --git a/gateway/platforms/slack.py b/gateway/platforms/slack.py index ca34ab4acac4..2116b569f968 100644 --- a/gateway/platforms/slack.py +++ b/gateway/platforms/slack.py @@ -2785,7 +2785,10 @@ async def _handle_slash_command(self, command: dict) -> None: from hermes_cli.commands import slack_subcommand_map subcommand_map = slack_subcommand_map() subcommand_map["compact"] = "/compress" - first_word = text.split()[0] if text else "" + # Guard against whitespace-only text where ``text`` is truthy but + # ``text.split()`` returns ``[]`` (e.g. user sends ``/hermes ``). + parts = text.split() if text else [] + first_word = parts[0] if parts else "" if first_word in subcommand_map: rest = text[len(first_word):].strip() text = f"{subcommand_map[first_word]} {rest}".strip() if rest else subcommand_map[first_word] diff --git a/gateway/platforms/tlon.py b/gateway/platforms/tlon.py new file mode 100644 index 000000000000..5c9661626fc2 --- /dev/null +++ b/gateway/platforms/tlon.py @@ -0,0 +1,2234 @@ +""" +Tlon (Urbit) platform adapter for Hermes Gateway. + +Connects to a Tlon ship via Eyre HTTP API: +- Authenticates with ship +code +- Subscribes to channel messages (channels /v2) and DMs (chat /v3) via SSE +- Sends messages back via pokes + +Requires: aiohttp (pip install aiohttp) + +Environment variables: + TLON_SHIP_URL - Ship URL (e.g. https://sampel-palnet.tlon.network) + TLON_SHIP_NAME - Ship name (e.g. ~sampel-palnet) + TLON_SHIP_CODE - Ship +code for authentication + TLON_CHANNELS - Comma-separated channel nests to monitor (e.g. chat/~host/channel) + TLON_DM_ALLOWLIST - Comma-separated ships allowed to DM (empty = all allowed) + TLON_HOME_CHANNEL - Default channel for cron delivery + TLON_ALLOWED_USERS - Comma-separated ships allowed to interact + TLON_ALLOW_ALL_USERS - Set to "true" to allow all users (default: false) + TLON_AUTO_DISCOVER - Set to "true" to auto-discover all group channels +""" + +import asyncio +import contextlib +import json +import logging +import os +import re +import time +import uuid +from datetime import datetime +from typing import Any, Dict, List, Optional, Set, Tuple + +logger = logging.getLogger(__name__) + +from gateway.config import Platform, PlatformConfig +from gateway.platforms.base import ( + BasePlatformAdapter, + MessageEvent, + MessageType, + SendResult, +) +from gateway.platforms.tlon_approval import ( + PendingApproval, + create_pending_approval, + find_pending_approval, + format_approval_request, + format_blocked_list, + format_confirmation, + format_pending_list, + has_duplicate_pending, + prune_expired, +) +from gateway.platforms.tlon_discovery import ( + TlonDiscovery, + parse_groups_ui_init, + parse_legacy_groups, + pending_group_invites, +) +from gateway.platforms.tlon_media import ( + combined_message_type, + download_blob_attachments, + download_story_images, + format_blob_annotations, + parse_blob_data, +) +from gateway.platforms.tlon_settings import ( + SETTINGS_BUCKET, + SETTINGS_DESK, + TlonSettings, + apply_settings_update, + parse_settings_event, + parse_settings_response, +) + +# Maximum message length for Tlon (generous - Tlon handles long messages well) +MAX_MESSAGE_LENGTH = 10000 + + +def check_tlon_requirements() -> bool: + """Check if aiohttp is available for HTTP/SSE communication.""" + try: + import aiohttp + return True + except ImportError: + logger.warning("Tlon adapter requires aiohttp. Install with: pip install aiohttp") + return False + + +def _normalize_ship(ship: str) -> str: + """Normalize a ship name to include ~ prefix.""" + ship = ship.strip() + if ship and not ship.startswith("~"): + ship = "~" + ship + return ship + + +def _parse_channel_nest(nest: str) -> Optional[Dict[str, str]]: + """Parse a channel nest like 'chat/~host/channel-name'.""" + parts = nest.split("/", 2) + if len(parts) != 3: + return None + return { + "type": parts[0], # chat, heap, diary + "host": parts[1], # ~host-ship + "name": parts[2], # channel-name + } + + +def _extract_author_ship(author: Any) -> str: + """Extract a normalized ship from a Tlon author field.""" + if isinstance(author, dict): + for key in ("ship", "id", "patp"): + value = author.get(key) + if isinstance(value, str) and value: + return _normalize_ship(value) + return "" + if isinstance(author, str): + return _normalize_ship(author) + return "" + + +def _extract_message_text(content: Any) -> str: + """ + Extract plain text from Tlon's story/content format. + + Tlon messages use a 'story' format: an array of blocks. + Each block is either: + - {"inline": [...]} with text strings, links, mentions, etc. + - {"block": {"image": {...}}} for images + - {"block": {"cite": {...}}} for quotes + """ + if not content: + return "" + + if isinstance(content, str): + return content + + if isinstance(content, list): + parts = [] + for block in content: + if isinstance(block, str): + parts.append(block) + elif isinstance(block, dict): + # Inline block: {"inline": [...]} + if "inline" in block: + text = _extract_inline_text(block["inline"]) + if text: + parts.append(text) + # Block types + elif "block" in block: + b = block["block"] + if isinstance(b, dict): + if "image" in b: + img = b["image"] + alt = img.get("alt", "") + src = img.get("src", "") + parts.append(f"[image: {alt or src}]") + elif "cite" in b: + parts.append("[quoted message]") + elif "code" in b: + code = b["code"] + lang = code.get("lang", "") + body = code.get("code", "") + parts.append(f"```{lang}\n{body}\n```") + elif "header" in b: + parts.append(_extract_inline_text(b["header"].get("content", []))) + elif "rule" in b: + parts.append("---") + return "\n".join(p for p in parts if p).strip() + + return str(content) + + +def _extract_inline_text(inlines: Any) -> str: + """Recursively extract text from inline content.""" + if isinstance(inlines, str): + return inlines + if isinstance(inlines, list): + parts = [] + for item in inlines: + if isinstance(item, str): + parts.append(item) + elif isinstance(item, dict): + if "ship" in item: + parts.append(_normalize_ship(item["ship"])) + elif "link" in item: + parts.append(item["link"].get("content", item["link"].get("href", ""))) + elif "bold" in item: + parts.append(_extract_inline_text(item["bold"])) + elif "italics" in item: + parts.append(_extract_inline_text(item["italics"])) + elif "strike" in item: + parts.append(_extract_inline_text(item["strike"])) + elif "blockquote" in item: + parts.append(_extract_inline_text(item["blockquote"])) + elif "inline-code" in item: + parts.append(item["inline-code"]) + elif "code" in item: + parts.append(item["code"]) + elif "break" in item: + parts.append("\n") + elif "tag" in item: + parts.append(f"#{item['tag']}") + return "".join(parts) + return "" + + +def _merge_adjacent_strings(inlines: List[Any]) -> List[Any]: + """Merge neighboring string inlines after markdown parsing.""" + merged: List[Any] = [] + for inline in inlines: + if ( + isinstance(inline, str) + and merged + and isinstance(merged[-1], str) + ): + merged[-1] += inline + else: + merged.append(inline) + return merged + + +def _parse_inline_markdown(text: str) -> List[Any]: + """Parse the subset of markdown that Tlon story inlines support.""" + result: List[Any] = [] + remaining = text + + while remaining: + image_match = re.match(r'^!\[([^\]]*)\]\(([^)]+)\)', remaining) + if image_match: + result.append({ + "__image": { + "src": image_match.group(2), + "alt": image_match.group(1), + } + }) + remaining = remaining[len(image_match.group(0)):] + continue + + ship_match = re.match(r'^(~[a-z][a-z0-9-]*)', remaining) + if ship_match: + result.append({"ship": ship_match.group(1)}) + remaining = remaining[len(ship_match.group(0)):] + continue + + bold_match = re.match(r'^\*\*(.+?)\*\*|^__(.+?)__', remaining) + if bold_match: + content = bold_match.group(1) or bold_match.group(2) + result.append({"bold": _parse_inline_markdown(content)}) + remaining = remaining[len(bold_match.group(0)):] + continue + + italics_match = re.match(r'^\*([^*]+?)\*|^_([^_]+?)_(?![a-zA-Z0-9])', remaining) + if italics_match: + content = italics_match.group(1) or italics_match.group(2) + result.append({"italics": _parse_inline_markdown(content)}) + remaining = remaining[len(italics_match.group(0)):] + continue + + strike_match = re.match(r'^~~(.+?)~~', remaining) + if strike_match: + result.append({"strike": _parse_inline_markdown(strike_match.group(1))}) + remaining = remaining[len(strike_match.group(0)):] + continue + + code_match = re.match(r'^`([^`]+)`', remaining) + if code_match: + result.append({"inline-code": code_match.group(1)}) + remaining = remaining[len(code_match.group(0)):] + continue + + link_match = re.match(r'^\[([^\]]+)\]\(([^)]+)\)', remaining) + if link_match: + result.append({ + "link": { + "href": link_match.group(2), + "content": link_match.group(1), + } + }) + remaining = remaining[len(link_match.group(0)):] + continue + + url_match = re.match(r'^(https?://[^\s<>"\]]+)', remaining) + if url_match: + url = url_match.group(1) + result.append({"link": {"href": url, "content": url}}) + remaining = remaining[len(url):] + continue + + special_indices = [ + idx for idx in ( + remaining.find("!["), + remaining.find("**"), + remaining.find("__"), + remaining.find("~~"), + remaining.find("`"), + remaining.find("["), + remaining.find("~"), + remaining.find("\n"), + remaining.find("*"), + remaining.find("_"), + ) + if idx >= 0 + ] + url_index = re.search(r'https?://', remaining) + if url_index: + special_indices.append(url_index.start()) + + next_token_index = min(special_indices) if special_indices else -1 + if next_token_index > 0: + result.append(remaining[:next_token_index]) + remaining = remaining[next_token_index:] + continue + + result.append(remaining[0]) + remaining = remaining[1:] + + return _merge_adjacent_strings(result) + + +def _replace_newlines_with_breaks(inlines: List[Any]) -> List[Any]: + """Turn literal newlines inside string inlines into Tlon break elements.""" + with_breaks: List[Any] = [] + for inline in inlines: + if isinstance(inline, str) and "\n" in inline: + pieces = inline.split("\n") + for index, piece in enumerate(pieces): + if piece: + with_breaks.append(piece) + if index < len(pieces) - 1: + with_breaks.append({"break": None}) + else: + with_breaks.append(inline) + return with_breaks + + +def _split_image_markers(inlines: List[Any]) -> Tuple[List[Any], List[Dict[str, Any]]]: + """Hoist markdown image markers out of inline content into story blocks.""" + clean: List[Any] = [] + images: List[Dict[str, Any]] = [] + for inline in inlines: + if isinstance(inline, dict) and "__image" in inline: + image = inline["__image"] + images.append({ + "block": { + "image": { + "src": image.get("src", ""), + "alt": image.get("alt", ""), + "width": 0, + "height": 0, + } + } + }) + else: + clean.append(inline) + return clean, images + + +def _text_to_story(text: str) -> list: + """ + Convert plain text/markdown to Tlon's story format. + + Returns a list of story blocks suitable for use as post content. + """ + story: List[Dict[str, Any]] = [] + lines = text.split("\n") + index = 0 + + while index < len(lines): + line = lines[index] + + if line.startswith("```"): + lang = line[3:].strip() or "plaintext" + code_lines = [] + index += 1 + while index < len(lines) and not lines[index].startswith("```"): + code_lines.append(lines[index]) + index += 1 + if index < len(lines): + index += 1 + story.append({ + "block": { + "code": { + "code": "\n".join(code_lines), + "lang": lang, + } + } + }) + continue + + header_match = re.match(r'^(#{1,6})\s+(.+)$', line) + if header_match: + level = len(header_match.group(1)) + story.append({ + "block": { + "header": { + "tag": f"h{level}", + "content": _parse_inline_markdown(header_match.group(2)), + } + } + }) + index += 1 + continue + + if re.match(r'^(-{3,}|\*{3,})$', line.strip()): + story.append({"block": {"rule": None}}) + index += 1 + continue + + if line.startswith("> "): + quote_lines = [] + while index < len(lines) and lines[index].startswith("> "): + quote_lines.append(lines[index][2:]) + index += 1 + story.append({ + "inline": [{ + "blockquote": _parse_inline_markdown("\n".join(quote_lines)) + }] + }) + continue + + if not line.strip(): + index += 1 + continue + + paragraph_lines = [] + while ( + index < len(lines) + and lines[index].strip() + and not lines[index].startswith("#") + and not lines[index].startswith("```") + and not lines[index].startswith("> ") + and not re.match(r'^(-{3,}|\*{3,})$', lines[index].strip()) + ): + paragraph_lines.append(lines[index]) + index += 1 + + inlines = _parse_inline_markdown("\n".join(paragraph_lines)) + inlines = _replace_newlines_with_breaks(inlines) + clean_inlines, image_blocks = _split_image_markers(inlines) + + if clean_inlines: + story.append({"inline": clean_inlines}) + story.extend(image_blocks) + + return story or [{"inline": [""]}] + + +def _format_ud(num: int) -> str: + """ + Format a number as Urbit @ud (dot-separated groups of 3 digits). + + Example: 170141184505128523237 โ†’ "170.141.184.505.128.523.237" + """ + s = str(num) + # Insert dots every 3 digits from the right + groups = [] + while len(s) > 3: + groups.append(s[-3:]) + s = s[:-3] + groups.append(s) + return ".".join(reversed(groups)) + + +def _normalize_post_id(message_id: Any) -> str: + """Normalize post IDs for equality across dotted and raw @ud forms.""" + return str(message_id or "").replace(".", "") + + +# Urbit @da epoch offset and second size (from @urbit/aura) +_DA_UNIX_EPOCH = 170141184475152167957503069145530368000 +_DA_SECOND = 18446744073709551616 + + +def _da_from_unix(unix_ms: int) -> str: + """ + Convert Unix timestamp (ms) to Urbit @da bigint, returned as @ud string. + + Replicates: formatUd(da.fromUnix(sentAt).toString()) from @urbit/aura. + """ + time_since_epoch = unix_ms * _DA_SECOND // 1000 + da_value = _DA_UNIX_EPOCH + time_since_epoch + return _format_ud(da_value) + + +class TlonSSEClient: + """ + Manages an Eyre SSE channel for subscribing to Tlon events. + + Handles: + - Authentication via POST /~/login + - Channel creation via PUT /~/channel/{id} + - SSE event streaming via GET /~/channel/{id} + - Event acknowledgement + - Reconnection with exponential backoff + """ + + def __init__( + self, + url: str, + code: str, + ship: str, + *, + auto_reconnect: bool = True, + max_reconnect_attempts: int = 10, + reconnect_delay: float = 1.0, + max_reconnect_delay: float = 30.0, + ): + self.url = url.rstrip("/") + self.code = code + self.ship = _normalize_ship(ship) + self.auto_reconnect = auto_reconnect + self.max_reconnect_attempts = max_reconnect_attempts + self.reconnect_delay = reconnect_delay + self.max_reconnect_delay = max_reconnect_delay + + self.cookie: Optional[str] = None + self.channel_id: Optional[str] = None + self.channel_url: Optional[str] = None + self._session: Optional[Any] = None # aiohttp.ClientSession + self._sse_task: Optional[asyncio.Task] = None + self._aborted = False + self._connected = False + self._reconnect_attempts = 0 + self._action_counter = 0 + + # Subscription tracking + self._subscriptions: List[Dict[str, Any]] = [] + self._event_handlers: Dict[int, Dict[str, Any]] = {} + + # Event ack tracking + self._last_heard_event_id = -1 + self._last_acked_event_id = -1 + self._ack_threshold = 20 + + async def authenticate(self) -> str: + """Authenticate with the ship and return the cookie.""" + import aiohttp + + if not self._session: + self._session = aiohttp.ClientSession() + + async with self._session.post( + f"{self.url}/~/login", + data={"password": self.code}, + allow_redirects=False, + timeout=aiohttp.ClientTimeout(total=15), + ) as resp: + if resp.status not in (200, 204, 302, 303, 307): + raise ConnectionError(f"Auth failed: HTTP {resp.status}") + cookie = resp.headers.get("set-cookie", "") + if not cookie: + # Try from cookies jar + for c in self._session.cookie_jar: + if c.key.startswith("urbauth"): + cookie = f"{c.key}={c.value}" + break + if not cookie: + raise ConnectionError("No auth cookie received") + self.cookie = cookie + logger.info("[tlon] Authenticated as %s", self.ship) + return cookie + + async def _new_channel_id(self) -> str: + """Generate a new unique channel ID.""" + ts = int(time.time()) + uid = uuid.uuid4().hex[:8] + return f"{ts}-{uid}" + + def _next_action_id(self) -> int: + """Get the next action ID for channel operations.""" + self._action_counter += 1 + return self._action_counter + + async def subscribe( + self, + app: str, + path: str, + on_event: Optional[Any] = None, + on_error: Optional[Any] = None, + on_quit: Optional[Any] = None, + ) -> int: + """ + Subscribe to a Gall agent path. + + Returns the subscription ID. + """ + sub_id = self._next_action_id() + sub = { + "id": sub_id, + "action": "subscribe", + "ship": self.ship.lstrip("~"), + "app": app, + "path": path, + } + self._subscriptions.append(sub) + self._event_handlers[sub_id] = { + "event": on_event, + "err": on_error, + "quit": on_quit, + } + + # If already connected, send subscription immediately + if self._connected: + await self._send_actions([sub]) + + return sub_id + + async def _send_actions(self, actions: List[Dict[str, Any]]) -> None: + """Send actions to the Eyre channel.""" + import aiohttp + + action_types = [a.get("action", "?") for a in actions] + logger.debug("[tlon] Sending %d action(s) to %s: %s", + len(actions), self.channel_url, action_types) + + # Let the cookie jar handle auth (set by authenticate()) + async with self._session.put( + self.channel_url, + json=actions, + headers={"Content-Type": "application/json"}, + timeout=aiohttp.ClientTimeout(total=30), + ) as resp: + if resp.status not in (200, 204): + text = await resp.text() + logger.error("[tlon] Channel action failed: HTTP %d - %s", + resp.status, text[:200]) + raise ConnectionError( + f"Channel action failed: HTTP {resp.status} - {text[:200]}" + ) + logger.debug("[tlon] Action(s) sent OK: HTTP %d", resp.status) + + async def connect(self) -> None: + """ + Create the Eyre channel with initial subscriptions and start + the SSE event stream. + """ + self.channel_id = await self._new_channel_id() + self.channel_url = f"{self.url}/~/channel/{self.channel_id}" + + # Create channel with all pending subscriptions + if self._subscriptions: + await self._send_actions(self._subscriptions) + + # Start SSE stream + await self._open_stream() + self._connected = True + self._reconnect_attempts = 0 + logger.info("[tlon] SSE connected on channel %s", self.channel_id) + + async def _open_stream(self) -> None: + """Open the SSE GET stream.""" + # Let cookie jar handle auth + headers = {"Accept": "text/event-stream"} + self._sse_task = asyncio.create_task(self._stream_loop(headers)) + + async def _stream_loop(self, headers: Dict[str, str]) -> None: + """Read the SSE stream and dispatch events.""" + import aiohttp + + try: + async with self._session.get( + self.channel_url, + headers=headers, + timeout=aiohttp.ClientTimeout( + total=None, # No total timeout for SSE + sock_read=None, # No read timeout + connect=60, + ), + ) as resp: + if resp.status != 200: + raise ConnectionError(f"SSE stream failed: HTTP {resp.status}") + + buffer = "" + async for chunk in resp.content.iter_any(): + if self._aborted: + break + buffer += chunk.decode("utf-8", errors="replace") + + while "\n\n" in buffer: + event_data, buffer = buffer.split("\n\n", 1) + await self._process_event(event_data) + + except asyncio.CancelledError: + return + except Exception as e: + if not self._aborted: + logger.error("[tlon] SSE stream error: %s", e) + self._connected = False + if self.auto_reconnect: + await self._attempt_reconnect() + + async def _process_event(self, event_data: str) -> None: + """Parse and dispatch a single SSE event.""" + lines = event_data.split("\n") + data = None + event_id = None + + for line in lines: + if line.startswith("id: "): + try: + event_id = int(line[4:]) + except ValueError: + pass + elif line.startswith("data: "): + data = line[6:] + + if not data: + return + + logger.debug("[tlon] SSE event id=%s, data=%s", event_id, data[:120]) + + # Track and ack events + if event_id is not None and event_id > self._last_heard_event_id: + self._last_heard_event_id = event_id + if event_id - self._last_acked_event_id > self._ack_threshold: + asyncio.create_task(self._ack(event_id)) + + try: + parsed = json.loads(data) + except json.JSONDecodeError: + logger.debug("[tlon] Non-JSON SSE data: %s", data[:100]) + return + + # Handle quit events (agent kicked us) + if parsed.get("response") == "quit": + sub_id = parsed.get("id") + if sub_id and sub_id in self._event_handlers: + handler = self._event_handlers[sub_id] + if handler.get("quit"): + handler["quit"]() + # Auto-resubscribe + asyncio.create_task(self._resubscribe(sub_id)) + return + + if parsed.get("response") == "err": + sub_id = parsed.get("id") + if sub_id and sub_id in self._event_handlers: + handler = self._event_handlers[sub_id] + if handler.get("err"): + handler["err"](parsed.get("json") or parsed) + return + + # Dispatch to handlers + sub_id = parsed.get("id") + event_json = parsed.get("json") + resp_type = parsed.get("response", "") + + logger.debug("[tlon] Dispatching: sub_id=%s, response=%s, has_json=%s, handlers=%s", + sub_id, resp_type, event_json is not None, list(self._event_handlers.keys())) + + if sub_id and sub_id in self._event_handlers: + handler = self._event_handlers[sub_id] + if handler.get("event") and event_json is not None: + try: + await handler["event"](event_json) + except Exception as e: + logger.error("[tlon] Event handler error: %s", e) + elif event_json is not None: + logger.debug("[tlon] Ignoring event with unknown sub_id=%s", sub_id) + + async def _ack(self, event_id: int) -> None: + """Acknowledge events up to event_id.""" + self._last_acked_event_id = event_id + try: + await self._send_actions([{ + "id": self._next_action_id(), + "action": "ack", + "event-id": event_id, + }]) + except Exception as e: + logger.debug("[tlon] Ack failed: %s", e) + + async def _resubscribe(self, old_sub_id: int) -> None: + """Re-subscribe after a quit event.""" + old_sub = None + for sub in self._subscriptions: + if sub["id"] == old_sub_id: + old_sub = sub + break + if not old_sub: + return + + handlers = self._event_handlers.get(old_sub_id) + if not handlers: + return + + for attempt in range(5): + delay = min(2.0 * (2 ** attempt), 30.0) + logger.info("[tlon] Resubscribing to %s%s in %.0fs...", + old_sub["app"], old_sub["path"], delay) + await asyncio.sleep(delay) + + if self._aborted or not self._connected: + return + + try: + new_id = self._next_action_id() + new_sub = {**old_sub, "id": new_id} + self._subscriptions.append(new_sub) + self._event_handlers[new_id] = handlers + del self._event_handlers[old_sub_id] + await self._send_actions([new_sub]) + logger.info("[tlon] Resubscribed to %s%s", old_sub["app"], old_sub["path"]) + return + except Exception as e: + logger.error("[tlon] Resubscribe failed: %s", e) + + async def _attempt_reconnect(self) -> None: + """Reconnect with exponential backoff.""" + if self._aborted: + return + + while self._reconnect_attempts < self.max_reconnect_attempts: + self._reconnect_attempts += 1 + delay = min( + self.reconnect_delay * (2 ** (self._reconnect_attempts - 1)), + self.max_reconnect_delay, + ) + logger.info("[tlon] Reconnecting in %.1fs (attempt %d/%d)...", + delay, self._reconnect_attempts, self.max_reconnect_attempts) + await asyncio.sleep(delay) + + if self._aborted: + return + + try: + # Re-authenticate + await self.authenticate() + # New channel + self.channel_id = await self._new_channel_id() + self.channel_url = f"{self.url}/~/channel/{self.channel_id}" + # Reconnect + await self.connect() + logger.info("[tlon] Reconnected successfully!") + return + except Exception as e: + logger.error("[tlon] Reconnect failed: %s", e) + + # Reset and keep trying + logger.warning("[tlon] Max reconnect attempts reached, resetting counter...") + await asyncio.sleep(10) + self._reconnect_attempts = 0 + await self._attempt_reconnect() + + async def poke(self, app: str, mark: str, json_data: Any) -> None: + """ + Send a poke via a one-shot Eyre channel. + + Uses a separate channel from the SSE stream (matching openclaw-tlon's + http-poke.ts pattern). Sending pokes on the SSE channel can cause + them to be silently dropped. + """ + import aiohttp + + poke_channel_id = await self._new_channel_id() + poke_url = f"{self.url}/~/channel/{poke_channel_id}" + + action = { + "id": int(time.time() * 1000), + "action": "poke", + "ship": self.ship.lstrip("~"), + "app": app, + "mark": mark, + "json": json_data, + } + + logger.info("[tlon] One-shot poke to %s mark=%s json=%s", + poke_url, mark, json.dumps(json_data)[:300]) + async with self._session.put( + poke_url, + json=[action], + headers={"Content-Type": "application/json"}, + timeout=aiohttp.ClientTimeout(total=30), + ) as resp: + if resp.status not in (200, 204): + text = await resp.text() + logger.error("[tlon] Poke failed: HTTP %d - %s", resp.status, text[:200]) + raise ConnectionError(f"Poke failed: HTTP {resp.status}") + logger.debug("[tlon] Poke PUT OK: HTTP %d", resp.status) + + # Read SSE ack/nack from the one-shot channel when Eyre provides one. + try: + ack_url = f"{poke_url}?msg=0" + async with self._session.get( + ack_url, + headers={"Accept": "text/event-stream"}, + timeout=aiohttp.ClientTimeout(total=5), + ) as ack_resp: + async for line in ack_resp.content: + decoded = line.decode("utf-8", errors="replace").strip() + if not decoded: + continue + if decoded.startswith("data:"): + data_str = decoded[5:].strip() + try: + data = json.loads(data_str) + if data.get("ok") is not None: + logger.info("[tlon] Poke ACK: %s", data) + break + elif data.get("err"): + logger.error("[tlon] Poke NACK: %s", data) + raise ConnectionError(f"Poke rejected: {data.get('err')}") + elif "ok" in str(data) or "err" in str(data): + logger.info("[tlon] Poke response: %s", data) + break + except json.JSONDecodeError: + if "ok" in data_str or "err" in data_str: + logger.info("[tlon] Poke SSE raw: %s", data_str[:200]) + break + except asyncio.TimeoutError: + logger.warning("[tlon] Poke ack read timed out (5s)") + except ConnectionError: + raise + except Exception as e: + logger.warning("[tlon] Poke ack read error: %s", e) + finally: + with contextlib.suppress(Exception): + await self._session.delete( + poke_url, + timeout=aiohttp.ClientTimeout(total=5), + ) + + async def scry(self, path: str) -> Any: + """Scry a path and return the JSON response.""" + import aiohttp + + if path.startswith("/~/scry/"): + path = path[len("/~/scry"):] + elif path.startswith("~/scry/"): + path = "/" + path[len("~/scry/"):] + + full_path = path if path.endswith(".json") else f"{path}.json" + # Use /~/scry prefix for Eyre scry endpoint + scry_url = f"{self.url}/~/scry{full_path}" + async with self._session.get( + scry_url, + timeout=aiohttp.ClientTimeout(total=30), + ) as resp: + if resp.status != 200: + text = await resp.text() + raise Exception(f"Scry failed: HTTP {resp.status} - {text[:200]}") + return await resp.json() + + async def close(self) -> None: + """Close the SSE connection and clean up.""" + self._aborted = True + self._connected = False + + if self._sse_task: + self._sse_task.cancel() + try: + await self._sse_task + except asyncio.CancelledError: + pass + + # Try to clean up the Eyre channel + if self._session and self.channel_url: + try: + # Unsubscribe + unsubs = [ + {"id": sub["id"], "action": "unsubscribe", "subscription": sub["id"]} + for sub in self._subscriptions + ] + if unsubs: + await self._send_actions(unsubs) + except Exception: + pass + + try: + await self._session.delete( + self.channel_url, + timeout=aiohttp.ClientTimeout(total=5), + ) + except Exception: + pass + + if self._session: + await self._session.close() + self._session = None + + +class TlonAdapter(BasePlatformAdapter): + """ + Hermes Gateway adapter for Tlon (Urbit). + + Connects to a Tlon ship and monitors channels + DMs for messages, + dispatching them to the Hermes agent session store. + """ + + MAX_MESSAGE_LENGTH = MAX_MESSAGE_LENGTH + + def __init__(self, config: PlatformConfig): + super().__init__(config, Platform.TLON) + + # Read config from env vars (following Hermes convention) + self.ship_url = os.getenv("TLON_SHIP_URL", "").rstrip("/") + self.ship_name = _normalize_ship(os.getenv("TLON_SHIP_NAME", "")) + self.ship_code = os.getenv("TLON_SHIP_CODE", "") + + # Channels to monitor + channels_str = os.getenv("TLON_CHANNELS", "") + self.monitored_channels: Set[str] = set( + ch.strip() for ch in channels_str.split(",") if ch.strip() + ) + + # DM allowlist + dm_str = os.getenv("TLON_DM_ALLOWLIST", "") + self.dm_allowlist: Set[str] = set( + _normalize_ship(s) for s in dm_str.split(",") if s.strip() + ) + + # User allowlist (for authorization) + users_str = os.getenv("TLON_ALLOWED_USERS", "") + self.allowed_users: Set[str] = set( + _normalize_ship(s) for s in users_str.split(",") if s.strip() + ) + self.allow_all = os.getenv("TLON_ALLOW_ALL_USERS", "").lower() in ("true", "1", "yes") + + # Auto-discover channels + self.auto_discover = os.getenv("TLON_AUTO_DISCOVER", "").lower() in ("true", "1", "yes") + self.auto_accept_dm_invites = os.getenv( + "TLON_AUTO_ACCEPT_DM_INVITES", + "", + ).lower() in ("true", "1", "yes") + self.auto_accept_group_invites = os.getenv( + "TLON_AUTO_ACCEPT_GROUP_INVITES", + "", + ).lower() in ("true", "1", "yes") + + self.owner_ship = _normalize_ship(os.getenv("TLON_OWNER_SHIP", "")) + default_auth = os.getenv("TLON_DEFAULT_AUTHORIZED_SHIPS", "") + self.default_authorized_ships: Set[str] = set( + _normalize_ship(s) for s in default_auth.split(",") if s.strip() + ) + self.blocked_ships: Set[str] = set( + _normalize_ship(s) + for s in os.getenv("TLON_BLOCKED_SHIPS", "").split(",") + if s.strip() + ) + self.channel_rules: Dict[str, Dict[str, Any]] = self._load_channel_rules_from_env() + self.pending_approvals: List[PendingApproval] = [] + + # SSE client + self._sse: Optional[TlonSSEClient] = None + self._settings = TlonSettings() + self._settings_loaded = False + + # Dedup tracker + self._processed_ids: Set[str] = set() + self._processed_dm_invites: Set[str] = set() + self._max_processed = 2000 + + # Bot nickname cache + self._bot_nickname: Optional[str] = None + + # Send dedup: prevent identical messages within a short window + self._recent_sends: Dict[str, float] = {} # hash -> timestamp + + # Thread-safe dedup lock for message processing + self._process_lock = asyncio.Lock() + + self._channel_to_group: Dict[str, str] = {} + self._group_names: Dict[str, str] = {} + self._participated_threads: Set[Tuple[str, str]] = set() + + def _load_channel_rules_from_env(self) -> Dict[str, Dict[str, Any]]: + raw = os.getenv("TLON_CHANNEL_RULES", "") + if not raw: + return {} + try: + parsed = json.loads(raw) + except ValueError: + logger.warning("[tlon] Ignoring invalid TLON_CHANNEL_RULES JSON") + return {} + if not isinstance(parsed, dict): + return {} + rules: Dict[str, Dict[str, Any]] = {} + for nest, rule in parsed.items(): + if isinstance(nest, str) and isinstance(rule, dict): + rules[nest] = rule + return rules + + async def _load_settings(self) -> None: + if not self._sse: + return + try: + raw = await self._sse.scry("/settings/all.json") + except Exception as e: + self._settings_loaded = True + logger.debug("[tlon] Settings load skipped: %s", e) + return + self._settings = parse_settings_response(raw) + self._settings_loaded = True + self._apply_settings(self._settings) + logger.info("[tlon] Settings loaded from %s/%s", SETTINGS_DESK, SETTINGS_BUCKET) + + async def _handle_settings_event(self, event: Any) -> None: + update = parse_settings_event(event) + if not update: + return + key, value = update + self._settings = apply_settings_update(self._settings, key, value) + self._apply_settings(self._settings) + logger.info("[tlon] Settings updated: %s", key) + + def _apply_settings(self, settings: TlonSettings) -> None: + if settings.group_channels is not None: + self.monitored_channels.update(ch for ch in settings.group_channels if ch) + if settings.dm_allowlist is not None: + self.dm_allowlist = {_normalize_ship(s) for s in settings.dm_allowlist if s} + if settings.auto_discover is not None: + self.auto_discover = settings.auto_discover + if settings.auto_accept_dm_invites is not None: + self.auto_accept_dm_invites = settings.auto_accept_dm_invites + if settings.auto_accept_group_invites is not None: + self.auto_accept_group_invites = settings.auto_accept_group_invites + if settings.channel_rules: + self.channel_rules = settings.channel_rules + if settings.default_authorized_ships is not None: + self.default_authorized_ships = { + _normalize_ship(s) for s in settings.default_authorized_ships if s + } + if settings.owner_ship: + self.owner_ship = _normalize_ship(settings.owner_ship) + if settings.pending_approvals is not None: + self.pending_approvals = [ + PendingApproval.from_dict(item) + for item in settings.pending_approvals + if isinstance(item, dict) + ] + + async def _put_settings_entry(self, key: str, value: Any) -> None: + if not self._sse: + return + try: + await self._sse.poke( + app="settings", + mark="settings-event", + json_data={ + "put-entry": { + "desk": SETTINGS_DESK, + "bucket-key": SETTINGS_BUCKET, + "entry-key": key, + "value": value, + } + }, + ) + except Exception as e: + logger.debug("[tlon] Failed to write settings entry %s: %s", key, e) + + async def connect(self) -> bool: + """Connect to the Tlon ship and start listening.""" + if not self.ship_url or not self.ship_name or not self.ship_code: + logger.error("[tlon] Missing config: TLON_SHIP_URL, TLON_SHIP_NAME, TLON_SHIP_CODE") + return False + + try: + self._sse = TlonSSEClient( + url=self.ship_url, + code=self.ship_code, + ship=self.ship_name, + ) + + # Authenticate + await self._sse.authenticate() + + # Fetch bot profile for nickname + try: + profile = await self._sse.scry("/contacts/v1/self.json") + if profile and isinstance(profile, dict): + self._bot_nickname = profile.get("nickname", {}).get("value") + if self._bot_nickname: + logger.info("[tlon] Bot nickname: %s", self._bot_nickname) + except Exception as e: + logger.debug("[tlon] Could not fetch self profile: %s", e) + + await self._load_settings() + + # Auto-discover channels from groups + if self.auto_discover: + try: + discovered = await self._discover_channels() + self.monitored_channels.update(discovered) + logger.info("[tlon] Auto-discovered %d channel(s)", len(discovered)) + except Exception as e: + logger.warning("[tlon] Auto-discovery failed: %s", e) + + if self.monitored_channels: + logger.info("[tlon] Monitoring %d channel(s): %s", + len(self.monitored_channels), + ", ".join(sorted(self.monitored_channels))) + else: + logger.info("[tlon] No group channels configured (DMs only)") + + # Subscribe to channels firehose (/v2) for group messages + await self._sse.subscribe( + app="channels", + path="/v2", + on_event=self._handle_channel_event, + on_error=lambda e: logger.error("[tlon] Channels error: %s", e), + on_quit=lambda: logger.info("[tlon] Channels quit received"), + ) + + # Subscribe to chat firehose (/v3) for DMs + await self._sse.subscribe( + app="chat", + path="/v3", + on_event=self._handle_dm_event, + on_error=lambda e: logger.error("[tlon] Chat error: %s", e), + on_quit=lambda: logger.info("[tlon] Chat quit received"), + ) + + # Subscribe to OpenClaw-compatible settings-store updates. This is + # intentionally best-effort; the adapter still runs from env config + # when %settings is unavailable on the ship. + await self._sse.subscribe( + app="settings", + path=f"/desk/{SETTINGS_DESK}", + on_event=self._handle_settings_event, + on_error=lambda e: logger.debug("[tlon] Settings error: %s", e), + on_quit=lambda: logger.info("[tlon] Settings subscription quit received"), + ) + + # Connect and start streaming + await self._sse.connect() + + self._running = True + logger.info("[tlon] Connected and listening!") + return True + + except Exception as e: + logger.error("[tlon] Connection failed: %s", e) + return False + + async def disconnect(self) -> None: + """Disconnect from the Tlon ship.""" + self._running = False + if self._sse: + await self._sse.close() + self._sse = None + logger.info("[tlon] Disconnected") + + async def handle_message(self, event) -> None: + """Override base adapter's handle_message to bypass the pending-message + replay system which causes echo loops on Tlon. + + The base adapter queues messages that arrive while an agent is running + and replays them after the response is sent โ€” but those replayed + messages re-trigger the agent, creating duplicate responses. + + Instead we process each message directly in its own background task + with no replay/interrupt machinery.""" + if not self._message_handler: + return + + async def _run(): + try: + response = await self._message_handler(event) + if response: + _, response = self.extract_media(response) + images, text_content = self.extract_images(response) + if text_content: + # For thread replies, pass reply_to so the response + # goes into the thread (not as a new top-level post). + # reply_to_message_id is set by inbound handlers when + # the message came from a thread. + reply_to = getattr(event, "reply_to_message_id", None) + logger.info("[tlon] Sending response (%d chars) to %s reply_to=%s", + len(text_content), event.source.chat_id, reply_to) + await self.send( + chat_id=event.source.chat_id, + content=text_content, + reply_to=reply_to, + ) + for img_url, alt in images: + await self.send_image( + chat_id=event.source.chat_id, + image_url=img_url, + caption=alt or None, + ) + except Exception as e: + logger.error("[tlon] handle_message error: %s", e, exc_info=True) + + asyncio.create_task(_run()) + + async def send( + self, + chat_id: str, + content: str, + reply_to: Optional[str] = None, + metadata: Optional[Dict[str, Any]] = None, + ) -> SendResult: + """ + Send a message to a Tlon channel or DM. + + chat_id: channel nest (e.g. "chat/~host/channel") or ship name for DMs + """ + if not self._sse or not self._sse._connected: + logger.error("[tlon] Send called but not connected!") + return SendResult(success=False, error="Not connected") + + try: + sent_at = int(time.time() * 1000) + + # Dedup: skip identical messages to the same chat within 30s + import hashlib + send_hash = hashlib.md5(f"{chat_id}:{content}".encode()).hexdigest() + now = time.time() + # Clean old entries + self._recent_sends = {k: v for k, v in self._recent_sends.items() if now - v < 30} + if send_hash in self._recent_sends: + logger.info("[tlon] Dedup: skipping duplicate send to %s (%d chars)", chat_id, len(content)) + return SendResult(success=True, message_id=f"dedup/{sent_at}") + self._recent_sends[send_hash] = now + + story = _text_to_story(content) + logger.info("[tlon] Sending to %s (%d chars, story=%d blocks)", + chat_id, len(content), len(story)) + + if chat_id.startswith("~"): + # DM โ€” pass reply_to for thread replies + # reply_to should be the parent writ-id (e.g. "~ship/170.141...") + await self._send_dm(chat_id, story, sent_at, reply_to=reply_to) + else: + # Channel post โ€” pass reply_to for thread replies + # reply_to should be the parent post ID (bare or @ud formatted) + formatted_reply = None + if reply_to: + # Format as @ud if it's a bare digit string + bare = str(reply_to).replace(".", "") + if bare.isdigit(): + formatted_reply = _format_ud(int(bare)) + else: + formatted_reply = str(reply_to) + await self._send_channel_post(chat_id, story, sent_at, reply_to=formatted_reply) + if formatted_reply: + self._participated_threads.add((chat_id, _normalize_post_id(formatted_reply))) + + msg_id = f"{self.ship_name}/{sent_at}" + logger.info("[tlon] โœ“ Message sent: %s", msg_id) + return SendResult(success=True, message_id=msg_id) + + except Exception as e: + logger.error("[tlon] Send failed: %s", e, exc_info=True) + return SendResult(success=False, error=str(e)) + + async def _send_dm( + self, + to_ship: str, + story: list, + sent_at: int, + reply_to: Optional[str] = None, + ) -> None: + """Send a DM via %chat poke.""" + to_ship = _normalize_ship(to_ship) + # Author uses ~ prefix (matching @tloncorp/api) + author = self.ship_name # e.g. "~timryd-macnus" + + # Build the writ ID: author/formatUd(da.fromUnix(sentAt)) + ud_time = _da_from_unix(sent_at) + writ_id = f"{author}/{ud_time}" + + if reply_to: + # DM reply uses "reply" delta with "memo" (not "essay") + delta = { + "reply": { + "id": writ_id, + "meta": None, + "delta": { + "add": { + "memo": { + "content": story, + "author": author, + "sent": sent_at, + }, + "time": None, + }, + }, + } + } + dm_json = { + "ship": to_ship, + "diff": { + "id": reply_to, + "delta": delta, + }, + } + else: + # Top-level DM uses "add" delta with "essay" + delta = { + "add": { + "essay": { + "content": story, + "author": author, + "sent": sent_at, + "kind": "/chat", + "meta": None, + "blob": None, + }, + "time": None, + } + } + dm_json = { + "ship": to_ship, + "diff": { + "id": writ_id, + "delta": delta, + }, + } + + logger.debug("[tlon] DM poke JSON: %s", json.dumps(dm_json)[:500]) + await self._sse.poke( + app="chat", + mark=os.getenv("TLON_DM_ACTION_MARK", "chat-dm-action-1"), + json_data=dm_json, + ) + + async def _send_channel_post( + self, + nest: str, + story: list, + sent_at: int, + reply_to: Optional[str] = None, + ) -> None: + """Send a post to a channel (chat, heap, diary).""" + # Author field WITH ~ prefix (matching @tloncorp/api convention) + author = self.ship_name if self.ship_name.startswith("~") else f"~{self.ship_name}" + + # Determine kind from nest type + kind = "/chat" + if nest.startswith("diary/"): + kind = "/diary" + elif nest.startswith("heap/"): + kind = "/heap" + + if reply_to: + # Channel reply: post.reply.action.add has flat fields + action_json = { + "channel": { + "nest": nest, + "action": { + "post": { + "reply": { + "id": reply_to, + "action": { + "add": { + "content": story, + "author": author, + "sent": sent_at, + } + }, + } + } + }, + } + } + else: + # Top-level post: post.add has essay fields directly (no wrapper) + action_json = { + "channel": { + "nest": nest, + "action": { + "post": { + "add": { + "content": story, + "author": author, + "sent": sent_at, + "kind": kind, + "meta": None, + "blob": None, + } + } + }, + } + } + + logger.debug("[tlon] Channel poke JSON: %s", json.dumps(action_json)[:500]) + await self._sse.poke( + app="channels", + mark=os.getenv("TLON_CHANNEL_ACTION_MARK", "channel-action-1"), + json_data=action_json, + ) + + async def send_image( + self, + chat_id: str, + image_url: str, + caption: Optional[str] = None, + reply_to: Optional[str] = None, + ) -> SendResult: + """Send an image as a Tlon story block with optional caption.""" + story = [] + if caption: + story.extend(_text_to_story(caption)) + # Add image block + story.append({ + "block": { + "image": { + "src": image_url, + "alt": caption or "", + "width": 0, + "height": 0, + } + } + }) + + sent_at = int(time.time() * 1000) + try: + if chat_id.startswith("~"): + await self._send_dm(chat_id, story, sent_at, reply_to) + else: + await self._send_channel_post(chat_id, story, sent_at, reply_to) + return SendResult(success=True, message_id=f"{self.ship_name}/{sent_at}") + except Exception as e: + return SendResult(success=False, error=str(e)) + + async def get_chat_info(self, chat_id: str) -> Dict[str, Any]: + """Get info about a chat/channel.""" + if chat_id.startswith("~"): + return { + "name": chat_id, + "type": "dm", + "chat_id": chat_id, + } + parsed = _parse_channel_nest(chat_id) + return { + "name": parsed["name"] if parsed else chat_id, + "type": "group", + "chat_id": chat_id, + } + + def _is_bot_mentioned(self, text: str) -> bool: + """Check if the bot is mentioned in the text.""" + text_lower = text.lower() + # Check ship name mention + if self.ship_name.lower() in text_lower: + return True + # Check nickname mention + if self._bot_nickname and self._bot_nickname.lower() in text_lower: + return True + return False + + def _strip_bot_mention(self, text: str) -> str: + """Remove bot mentions from text.""" + # Remove ship name + text = re.sub( + re.escape(self.ship_name), + "", + text, + flags=re.IGNORECASE, + ).strip() + # Remove nickname if set + if self._bot_nickname: + text = re.sub( + re.escape(self._bot_nickname), + "", + text, + flags=re.IGNORECASE, + ).strip() + return text + + def _mark_processed(self, msg_id: str) -> bool: + """ + Mark a message ID as processed. Returns True if this is new, + False if already processed (duplicate). + """ + if msg_id in self._processed_ids: + logger.info("[tlon] Dedup: already processed %s", msg_id[:40]) + return False + self._processed_ids.add(msg_id) + logger.info("[tlon] Dedup: marking new %s (total=%d)", msg_id[:40], len(self._processed_ids)) + # Trim old entries + if len(self._processed_ids) > self._max_processed: + # Remove oldest entries (set doesn't preserve order, but this is fine + # for dedup purposes - we just prevent unbounded growth) + excess = len(self._processed_ids) - self._max_processed + to_remove = list(self._processed_ids)[:excess] + for item in to_remove: + self._processed_ids.discard(item) + return True + + async def _prepare_media_context( + self, + *, + story_content: Any, + blob: Optional[str], + text: str, + ) -> Tuple[str, List[str], List[str], MessageType]: + """Download Tlon attachments and prepend blob annotations to text.""" + media_paths: List[str] = [] + media_types: List[str] = [] + notices: List[str] = [] + + try: + story_attachments = await download_story_images(story_content) + for item in story_attachments: + media_paths.append(item.path) + media_types.append(item.content_type) + except Exception as e: + logger.debug("[tlon] Story image download failed: %s", e) + + blob_entries = parse_blob_data(blob) + if blob_entries: + annotations = format_blob_annotations(blob_entries) + if annotations: + text = f"{annotations}\n{text}".strip() + try: + blob_attachments, blob_notices = await download_blob_attachments(blob_entries) + notices.extend(blob_notices) + for item in blob_attachments: + media_paths.append(item.path) + media_types.append(item.content_type) + except Exception as e: + logger.debug("[tlon] Blob download failed: %s", e) + + if notices: + text = f"{chr(10).join(notices)}\n{text}".strip() + + return text, media_paths, media_types, combined_message_type(media_types) + + def _is_owner(self, ship: str) -> bool: + return bool(self.owner_ship and _normalize_ship(ship) == self.owner_ship) + + def _is_blocked(self, ship: str) -> bool: + return _normalize_ship(ship) in self.blocked_ships + + def _is_channel_allowed(self, ship: str, nest: str) -> bool: + ship = _normalize_ship(ship) + if self._is_blocked(ship): + return False + if self._is_owner(ship): + return True + if self.allow_all or os.getenv("GATEWAY_ALLOW_ALL_USERS", "").lower() in ("true", "1", "yes"): + return True + global_users = os.getenv("GATEWAY_ALLOWED_USERS", "") + if global_users: + allowed = {_normalize_ship(s) for s in global_users.split(",") if s.strip()} + if ship in allowed: + return True + if self.allowed_users and ship in self.allowed_users: + return True + + rule = self.channel_rules.get(nest) or {} + mode = rule.get("mode") or ("restricted" if self.owner_ship else "open") + if mode == "open": + return True + + allowed = set(self.default_authorized_ships) + allowed.update(_normalize_ship(s) for s in rule.get("allowedShips", []) if isinstance(s, str)) + return ship in allowed + + async def _queue_approval(self, approval: PendingApproval) -> None: + if not self.owner_ship: + return + self.pending_approvals = prune_expired(self.pending_approvals) + if has_duplicate_pending( + self.pending_approvals, + approval_type=approval.type, + requesting_ship=approval.requesting_ship, + channel_nest=approval.channel_nest, + group_flag=approval.group_flag, + ): + logger.info("[tlon] Approval already pending for %s", approval.requesting_ship) + return + + self.pending_approvals.append(approval) + await self._put_settings_entry( + "pendingApprovals", + json.dumps([item.to_dict() for item in self.pending_approvals]), + ) + try: + await self.send(self.owner_ship, format_approval_request(approval)) + logger.info("[tlon] Queued approval %s for %s", approval.id, approval.requesting_ship) + except Exception as e: + logger.debug("[tlon] Failed to notify owner about approval %s: %s", approval.id, e) + + async def _handle_owner_command(self, sender: str, text: str) -> Optional[str]: + if not self._is_owner(sender) or not text.startswith("/"): + return None + + parts = text.strip().split() + command = parts[0].lower() + arg = parts[1] if len(parts) > 1 else None + + if command == "/pending": + return format_pending_list(self.pending_approvals) + if command in {"/blocked", "/banned"}: + return format_blocked_list(list(self.blocked_ships)) + if command in {"/unban", "/unblock"}: + if not arg: + return "Usage: /unban ~ship" + ship = _normalize_ship(arg) + self.blocked_ships.discard(ship) + self.dm_allowlist.discard(ship) + return f"Unblocked {ship}." + + action_by_command = { + "/allow": "approve", + "/approve": "approve", + "/reject": "deny", + "/deny": "deny", + "/ban": "block", + "/block": "block", + } + action = action_by_command.get(command) + if not action: + return None + + approval = find_pending_approval(self.pending_approvals, arg) + if not approval: + return "No matching pending Tlon approval." + + return await self._execute_approval_action(approval, action) + + async def _execute_approval_action(self, approval: PendingApproval, action: str) -> str: + if action == "approve": + if approval.type == "dm": + self.dm_allowlist.add(approval.requesting_ship) + await self._put_settings_entry("dmAllowlist", sorted(self.dm_allowlist)) + elif approval.type == "channel" and approval.channel_nest: + rule = dict(self.channel_rules.get(approval.channel_nest) or {}) + allowed = { + _normalize_ship(s) + for s in rule.get("allowedShips", []) + if isinstance(s, str) + } + allowed.add(approval.requesting_ship) + rule["mode"] = "restricted" + rule["allowedShips"] = sorted(allowed) + self.channel_rules[approval.channel_nest] = rule + await self._put_settings_entry("channelRules", json.dumps(self.channel_rules)) + elif approval.type == "group" and approval.group_flag and self._sse: + await self._sse.poke( + app="groups", + mark="group-join", + json_data={"flag": approval.group_flag, "join-all": True}, + ) + await self._dispatch_pending_message(approval) + elif action == "block": + self.blocked_ships.add(approval.requesting_ship) + self.dm_allowlist.discard(approval.requesting_ship) + await self._put_settings_entry("dmAllowlist", sorted(self.dm_allowlist)) + + self.pending_approvals = [ + item for item in self.pending_approvals if item.id != approval.id + ] + await self._put_settings_entry( + "pendingApprovals", + json.dumps([item.to_dict() for item in self.pending_approvals]), + ) + return format_confirmation(approval, action) + + async def _dispatch_pending_message(self, approval: PendingApproval) -> None: + raw = approval.original_message or {} + if not raw: + return + source = self.build_source( + chat_id=str(raw.get("chat_id") or approval.requesting_ship), + chat_name=str(raw.get("chat_name") or raw.get("chat_id") or approval.requesting_ship), + chat_type=str(raw.get("chat_type") or "dm"), + user_id=approval.requesting_ship, + user_name=approval.requesting_ship, + thread_id=str(raw.get("thread_id")) if raw.get("thread_id") else None, + ) + event_obj = MessageEvent( + text=str(raw.get("text") or ""), + message_type=MessageType(str(raw.get("message_type") or MessageType.TEXT.value)), + source=source, + message_id=str(raw.get("message_id") or approval.id), + reply_to_message_id=( + str(raw.get("reply_to_message_id")) + if raw.get("reply_to_message_id") + else None + ), + timestamp=datetime.fromtimestamp(float(raw.get("timestamp") or time.time())), + media_urls=list(raw.get("media_urls") or []), + media_types=list(raw.get("media_types") or []), + ) + await self.handle_message(event_obj) + + async def _handle_group_invites(self, foreigns: Dict[str, Any]) -> None: + if not self._sse: + return + allowlist = { + _normalize_ship(s) + for s in (self._settings.group_invite_allowlist or []) + if s + } + for invite in pending_group_invites(foreigns): + inviter = _normalize_ship( + str(invite.get("ship") or invite.get("inviter") or invite.get("invitedBy") or "") + ) + if allowlist and inviter not in allowlist and not self._is_owner(inviter): + continue + group_flag = invite.get("groupFlag") + if not isinstance(group_flag, str): + continue + try: + await self._sse.poke( + app="groups", + mark="group-join", + json_data={"flag": group_flag, "join-all": True}, + ) + logger.info("[tlon] Auto-accepted group invite to %s", group_flag) + except Exception as e: + logger.debug("[tlon] Failed to accept group invite %s: %s", group_flag, e) + + async def _discover_channels(self) -> Set[str]: + """Discover channels from groups the bot is a member of.""" + discovered = TlonDiscovery() + try: + init_data = await self._sse.scry("/groups-ui/v7/init.json") + discovered = parse_groups_ui_init(init_data) + except Exception as e: + logger.debug("[tlon] groups-ui discovery failed: %s", e) + try: + groups = await self._sse.scry("/groups/v1/groups.json") + discovered = parse_legacy_groups(groups) + except Exception as legacy_e: + logger.debug("[tlon] legacy channel discovery failed: %s", legacy_e) + + self._channel_to_group.update(discovered.channel_to_group) + self._group_names.update(discovered.group_names) + + if self.auto_accept_group_invites and discovered.foreigns: + await self._handle_group_invites(discovered.foreigns) + + return discovered.channels + + async def _handle_channel_event(self, event: Any) -> None: + """ + Handle a channels firehose (/v2) event. + + Event structure for new posts: + { + "nest": "chat/~host/channel", + "response": { + "post": { + "id": "170141...", + "r-post": { + "set": { + "revision": "0", + "seal": { "id": "...", ... }, + "essay": { + "author": "~ship", + "sent": 1773..., + "kind": "/chat", + "content": [{"inline": ["text"]}], + ... + }, + "type": "post" + } + } + } + } + } + """ + try: + if not isinstance(event, dict): + return + + nest = event.get("nest") + if not nest: + return + + # Auto-watch channels from firehose + if nest not in self.monitored_channels: + if self.auto_discover and (nest.startswith("chat/") or nest.startswith("heap/")): + self.monitored_channels.add(nest) + logger.info("[tlon] Auto-watching channel: %s", nest) + else: + return + + response = event.get("response") + if not response: + return + + # Extract post data + post = response.get("post") + if not post or not isinstance(post, dict): + return + + msg_id = post.get("id") + r_post = post.get("r-post", {}) + if not r_post: + return + + # Two event shapes: + # 1) Top-level post: r-post.set.essay (type="post") + # 2) Thread reply: r-post.reply["r-reply"].set.memo + post_data = r_post.get("set") or {} + essay = post_data.get("essay") if isinstance(post_data, dict) else None + + reply_data = r_post.get("reply") + reply_memo = None + reply_id = None + is_thread_reply = False + if reply_data and isinstance(reply_data, dict): + reply_id = reply_data.get("id") + r_reply = reply_data.get("r-reply", {}) + if r_reply: + reply_set = r_reply.get("set") + if reply_set and isinstance(reply_set, dict): + reply_memo = reply_set.get("memo") or reply_set.get("essay") + is_thread_reply = True + + content = reply_memo or essay + if not content: + return + + effective_id = reply_id if is_thread_reply else msg_id + + event_type = "reply" if is_thread_reply else "post" + logger.info("[tlon] Channel event: nest=%s msg_id=%s type=%s is_reply=%s", + nest, msg_id, event_type, is_thread_reply) + + # Use lock to prevent race condition with concurrent event processing + async with self._process_lock: + if not effective_id or not self._mark_processed(str(effective_id)): + logger.info("[tlon] Channel dedup: skipping %s", effective_id) + return + # Lock released after this block โ€” but we've claimed the msg_id + + sender = _extract_author_ship(content.get("author")) + if not sender or sender == self.ship_name: + return + + # Get seal for thread context + if is_thread_reply: + reply_set = reply_data.get("r-reply", {}).get("set", {}) + seal = reply_set.get("seal", {}) if isinstance(reply_set, dict) else {} + else: + seal = post_data.get("seal", {}) if isinstance(post_data, dict) else {} + parent_id = seal.get("parent-id") or seal.get("parent") + + raw_text = _extract_message_text(content.get("content")) + text, media_urls, media_types, message_type = await self._prepare_media_context( + story_content=content.get("content"), + blob=content.get("blob"), + text=raw_text, + ) + if not text.strip() and not media_urls: + return + + logger.info("[tlon] Channel msg from %s in %s: %s", + sender, nest, text[:80]) + + mentioned = self._is_bot_mentioned(text) + thread_key = (nest, _normalize_post_id(parent_id)) if parent_id else None + in_participated_thread = bool(thread_key and thread_key in self._participated_threads) + owner_blob_only = bool(self._is_owner(sender) and media_urls and not raw_text.strip()) + + # In group channels, respond to mentions, participated threads, or + # owner blob-only messages. + if not (mentioned or in_participated_thread or owner_blob_only): + logger.debug("[tlon] Not mentioned, ignoring") + return + + # Check user authorization + if not self._is_channel_allowed(sender, nest): + logger.info("[tlon] Unauthorized user %s in %s", sender, nest) + if self.owner_ship: + approval = create_pending_approval( + approval_type="channel", + requesting_ship=sender, + channel_nest=nest, + existing_ids=[item.id for item in self.pending_approvals], + message_preview=text[:200], + original_message={ + "chat_id": nest, + "chat_name": (_parse_channel_nest(nest) or {}).get("name", nest), + "chat_type": "group", + "text": self._strip_bot_mention(text) if mentioned else text, + "message_id": str(effective_id), + "reply_to_message_id": str(parent_id) if parent_id else None, + "thread_id": str(parent_id) if parent_id else None, + "timestamp": content.get("sent", time.time() * 1000) / 1000, + "media_urls": media_urls, + "media_types": media_types, + "message_type": message_type.value, + }, + ) + await self._queue_approval(approval) + return + + # Strip bot mention from text + clean_text = self._strip_bot_mention(text) if mentioned else text + logger.info("[tlon] Processing message from %s: %s", sender, clean_text[:80]) + + # Build message event + parsed = _parse_channel_nest(nest) + source = self.build_source( + chat_id=nest, + chat_name=parsed["name"] if parsed else nest, + chat_type="group", + user_id=sender, + user_name=sender, + thread_id=str(parent_id) if parent_id else None, + ) + + event_obj = MessageEvent( + text=clean_text, + message_type=message_type, + source=source, + message_id=str(effective_id), + reply_to_message_id=str(parent_id) if parent_id else None, + timestamp=datetime.fromtimestamp( + content.get("sent", time.time() * 1000) / 1000 + ), + media_urls=media_urls, + media_types=media_types, + ) + + if parent_id: + self._participated_threads.add((nest, _normalize_post_id(parent_id))) + await self.handle_message(event_obj) + + except Exception as e: + logger.error("[tlon] Channel event error: %s", e, exc_info=True) + + async def _handle_dm_event(self, event: Any) -> None: + """Handle a chat firehose (/v3) event.""" + try: + logger.info("[tlon] _handle_dm_event called, keys=%s", + list(event.keys()) if isinstance(event, dict) else type(event).__name__) + # Handle DM invite arrays + if isinstance(event, list): + for invite in event: + if isinstance(invite, dict): + ship = _normalize_ship(str(invite.get("ship") or "")) + elif isinstance(invite, str): + ship = _normalize_ship(invite) + else: + continue + if not ship or ship in self._processed_dm_invites: + continue + if ship and ( + self.auto_accept_dm_invites + or self._is_owner(ship) + or self._is_user_allowed(ship, is_dm=True) + ): + try: + await self._sse.poke( + app="chat", + mark="chat-dm-rsvp", + json_data={"ship": ship.lstrip("~"), "ok": True}, + ) + logger.info("[tlon] Auto-accepted DM invite from %s", ship) + except Exception as e: + logger.error("[tlon] Failed to accept DM from %s: %s", ship, e) + self._processed_dm_invites.add(ship) + elif self.owner_ship: + approval = create_pending_approval( + approval_type="dm", + requesting_ship=ship, + existing_ids=[item.id for item in self.pending_approvals], + message_preview="(DM invite - no message yet)", + ) + await self._queue_approval(approval) + self._processed_dm_invites.add(ship) + return + + if not isinstance(event, dict): + return + + if "whom" not in event or "response" not in event: + return + + whom = event["whom"] + msg_id = event.get("id") + response = event["response"] + + # Extract message content + essay = response.get("add", {}).get("essay") if isinstance(response.get("add"), dict) else None + dm_reply_memo = None + dm_reply = response.get("reply") + if isinstance(dm_reply, dict): + dm_reply_memo = (dm_reply.get("delta", {}) + .get("add", {}) + .get("memo")) + + content = essay or dm_reply_memo + if not content: + return + + is_thread_reply = bool(dm_reply_memo) + effective_id = msg_id + if is_thread_reply and dm_reply: + effective_id = ( + dm_reply.get("id") + or dm_reply.get("delta", {}).get("add", {}).get("id") + or ( + f"{_extract_author_ship(content.get('author'))}/{_da_from_unix(content.get('sent'))}" + if content.get("author") and content.get("sent") + else msg_id + ) + ) + + async with self._process_lock: + if not effective_id or not self._mark_processed(str(effective_id)): + return + + sender = _extract_author_ship(content.get("author")) + # Extract DM partner from whom field + partner = _normalize_ship(whom) if isinstance(whom, str) else "" + + logger.info("[tlon] DM event: whom=%s, sender=%s, self=%s", + whom, sender, self.ship_name) + + # Skip our own messages (author == us) + if sender == self.ship_name: + logger.info("[tlon] Skipping own DM message") + return + + # Use partner for routing, author for identity + effective_sender = partner or sender + if not effective_sender: + return + + raw_text = _extract_message_text(content.get("content")) + text, media_urls, media_types, message_type = await self._prepare_media_context( + story_content=content.get("content"), + blob=content.get("blob"), + text=raw_text, + ) + if not text.strip() and not media_urls: + return + + owner_command_response = await self._handle_owner_command(sender, text) + if owner_command_response is not None: + await self.send(effective_sender, owner_command_response, reply_to=msg_id) + return + + # Check DM authorization (includes dm_allowlist) + if not self._is_user_allowed(effective_sender, is_dm=True): + logger.info("[tlon] Unauthorized DM from %s", effective_sender) + if self.owner_ship: + approval = create_pending_approval( + approval_type="dm", + requesting_ship=effective_sender, + existing_ids=[item.id for item in self.pending_approvals], + message_preview=text[:200], + original_message={ + "chat_id": effective_sender, + "chat_name": effective_sender, + "chat_type": "dm", + "text": text, + "message_id": str(effective_id), + "reply_to_message_id": str(msg_id) if is_thread_reply else None, + "thread_id": str(msg_id) if is_thread_reply else None, + "timestamp": content.get("sent", time.time() * 1000) / 1000, + "media_urls": media_urls, + "media_types": media_types, + "message_type": message_type.value, + }, + ) + await self._queue_approval(approval) + return + + # Build message event + source = self.build_source( + chat_id=effective_sender, + chat_name=effective_sender, + chat_type="dm", + user_id=effective_sender, + user_name=effective_sender, + thread_id=str(msg_id) if is_thread_reply else None, + ) + + event_obj = MessageEvent( + text=text, + message_type=message_type, + source=source, + message_id=str(effective_id), + reply_to_message_id=str(msg_id) if is_thread_reply else None, + timestamp=datetime.fromtimestamp( + content.get("sent", time.time() * 1000) / 1000 + ), + media_urls=media_urls, + media_types=media_types, + ) + + logger.info("[tlon] >>> Calling handle_message for DM from %s, msg_id=%s", effective_sender, effective_id) + await self.handle_message(event_obj) + + except Exception as e: + logger.error("[tlon] DM event error: %s", e, exc_info=True) + + def _is_user_allowed(self, ship: str, is_dm: bool = False) -> bool: + """Check if a ship is authorized to interact with the bot.""" + ship = _normalize_ship(ship) + if self._is_blocked(ship): + return False + if self._is_owner(ship): + return True + + # Global allow-all + global_allow = os.getenv("GATEWAY_ALLOW_ALL_USERS", "").lower() in ("true", "1", "yes") + if global_allow or self.allow_all: + return True + + # Check global allowlist + global_users = os.getenv("GATEWAY_ALLOWED_USERS", "") + if global_users: + allowed = set(_normalize_ship(s) for s in global_users.split(",") if s.strip()) + if ship in allowed: + return True + + # Check Tlon-specific allowlist + if self.allowed_users and ship in self.allowed_users: + return True + + if self.default_authorized_ships and ship in self.default_authorized_ships: + return True + + # Check DM-specific allowlist + if is_dm and self.dm_allowlist and ship in self.dm_allowlist: + return True + + if is_dm and self.owner_ship: + return False + + # If no allowlists configured at all, allow (open by default) + if ( + not self.allowed_users + and not global_users + and not self.dm_allowlist + and not self.default_authorized_ships + ): + return True + + return False diff --git a/gateway/platforms/tlon_approval.py b/gateway/platforms/tlon_approval.py new file mode 100644 index 000000000000..105922bb9306 --- /dev/null +++ b/gateway/platforms/tlon_approval.py @@ -0,0 +1,193 @@ +"""Approval and authorization helpers for Tlon.""" + +from __future__ import annotations + +import time +import uuid +from dataclasses import dataclass, field +from typing import Any, Dict, List, Optional + + +APPROVAL_TTL_SECONDS = 48 * 60 * 60 + + +@dataclass +class PendingApproval: + id: str + type: str + requesting_ship: str + channel_nest: Optional[str] = None + group_flag: Optional[str] = None + group_title: Optional[str] = None + message_preview: Optional[str] = None + original_message: Dict[str, Any] = field(default_factory=dict) + timestamp: float = field(default_factory=time.time) + notification_message_id: Optional[str] = None + + def to_dict(self) -> Dict[str, Any]: + return { + "id": self.id, + "type": self.type, + "requestingShip": self.requesting_ship, + "channelNest": self.channel_nest, + "groupFlag": self.group_flag, + "groupTitle": self.group_title, + "messagePreview": self.message_preview, + "originalMessage": self.original_message or None, + "timestamp": int(self.timestamp * 1000), + "notificationMessageId": self.notification_message_id, + } + + @classmethod + def from_dict(cls, raw: Dict[str, Any]) -> "PendingApproval": + timestamp = raw.get("timestamp") + if isinstance(timestamp, (int, float)) and timestamp > 10_000_000_000: + timestamp = timestamp / 1000 + return cls( + id=str(raw.get("id") or _generate_id(str(raw.get("type") or "dm"), [])), + type=str(raw.get("type") or "dm"), + requesting_ship=str(raw.get("requestingShip") or raw.get("requesting_ship") or ""), + channel_nest=raw.get("channelNest") or raw.get("channel_nest"), + group_flag=raw.get("groupFlag") or raw.get("group_flag"), + group_title=raw.get("groupTitle") or raw.get("group_title"), + message_preview=raw.get("messagePreview") or raw.get("message_preview"), + original_message=raw.get("originalMessage") or raw.get("original_message") or {}, + timestamp=float(timestamp or time.time()), + notification_message_id=raw.get("notificationMessageId"), + ) + + +def create_pending_approval( + *, + approval_type: str, + requesting_ship: str, + existing_ids: List[str], + channel_nest: Optional[str] = None, + group_flag: Optional[str] = None, + group_title: Optional[str] = None, + message_preview: Optional[str] = None, + original_message: Optional[Dict[str, Any]] = None, +) -> PendingApproval: + return PendingApproval( + id=_generate_id(approval_type, existing_ids), + type=approval_type, + requesting_ship=requesting_ship, + channel_nest=channel_nest, + group_flag=group_flag, + group_title=group_title, + message_preview=message_preview, + original_message=original_message or {}, + ) + + +def prune_expired(approvals: List[PendingApproval]) -> List[PendingApproval]: + now = time.time() + return [approval for approval in approvals if now - approval.timestamp <= APPROVAL_TTL_SECONDS] + + +def find_pending_approval( + approvals: List[PendingApproval], + approval_id: Optional[str] = None, +) -> Optional[PendingApproval]: + active = prune_expired(approvals) + if approval_id: + exact = next((approval for approval in active if approval.id == approval_id), None) + if exact: + return exact + matches = [approval for approval in active if approval.id.startswith(approval_id)] + return matches[0] if len(matches) == 1 else None + return active[-1] if active else None + + +def has_duplicate_pending( + approvals: List[PendingApproval], + *, + approval_type: str, + requesting_ship: str, + channel_nest: Optional[str] = None, + group_flag: Optional[str] = None, +) -> bool: + for approval in prune_expired(approvals): + if approval.type != approval_type or approval.requesting_ship != requesting_ship: + continue + if approval_type == "channel" and approval.channel_nest != channel_nest: + continue + if approval_type == "group" and approval.group_flag != group_flag: + continue + return True + return False + + +def format_approval_request(approval: PendingApproval) -> str: + preview = f'\n"{_truncate(approval.message_preview or "", 140)}"' if approval.message_preview else "" + if approval.type == "dm": + subject = f"DM request from {approval.requesting_ship}" + elif approval.type == "channel": + subject = ( + f"{approval.requesting_ship} mentioned Hermes in " + f"{approval.channel_nest or 'a channel'}" + ) + else: + group = approval.group_title or approval.group_flag or "a group" + subject = f"Group invite from {approval.requesting_ship} to join {group}" + + return "\n".join( + [ + subject, + preview, + "", + f"Pending approval id: {approval.id}", + "", + "Use one of:", + f"/allow {approval.id}", + f"/reject {approval.id}", + f"/ban {approval.id}", + ] + ) + + +def format_pending_list(approvals: List[PendingApproval]) -> str: + active = prune_expired(approvals) + if not active: + return "No pending Tlon approvals." + lines = [f"Pending Tlon approvals ({len(active)}):"] + for approval in active: + where = approval.channel_nest or approval.group_flag or "DM" + preview = f" - {_truncate(approval.message_preview, 80)}" if approval.message_preview else "" + lines.append(f"{approval.id}: {approval.type} from {approval.requesting_ship} in {where}{preview}") + return "\n".join(lines) + + +def format_blocked_list(blocked: List[str]) -> str: + if not blocked: + return "No Tlon ships are blocked." + return "Blocked Tlon ships:\n" + "\n".join(f"- {ship}" for ship in sorted(blocked)) + + +def format_confirmation(approval: PendingApproval, action: str) -> str: + ship = approval.requesting_ship + if action == "approve": + if approval.type == "dm": + return f"Approved DM access for {ship}." + if approval.type == "channel": + return f"Approved {ship} in {approval.channel_nest}." + return f"Approved group invite from {ship}." + if action == "block": + return f"Blocked {ship}." + return f"Rejected request from {ship}." + + +def _generate_id(approval_type: str, existing_ids: List[str]) -> str: + prefix = (approval_type or "x")[0] + for _ in range(10): + approval_id = f"{prefix}{uuid.uuid4().hex[:4]}" + if approval_id not in existing_ids: + return approval_id + return f"{prefix}{uuid.uuid4().hex[:8]}" + + +def _truncate(text: Optional[str], max_len: int) -> str: + text = text or "" + if len(text) <= max_len: + return text + return text[: max_len - 3] + "..." diff --git a/gateway/platforms/tlon_discovery.py b/gateway/platforms/tlon_discovery.py new file mode 100644 index 000000000000..1271198092d0 --- /dev/null +++ b/gateway/platforms/tlon_discovery.py @@ -0,0 +1,97 @@ +"""Discovery helpers for Tlon groups and channels.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any, Dict, Set + + +@dataclass +class TlonDiscovery: + channels: Set[str] = field(default_factory=set) + channel_to_group: Dict[str, str] = field(default_factory=dict) + group_names: Dict[str, str] = field(default_factory=dict) + foreigns: Dict[str, Any] = field(default_factory=dict) + + +def parse_groups_ui_init(raw: Any) -> TlonDiscovery: + """Parse ``/groups-ui/v7/init.json`` style data.""" + result = TlonDiscovery() + if not isinstance(raw, dict): + return result + + groups = raw.get("groups") + if isinstance(groups, dict): + for group_flag, group_data in groups.items(): + if not isinstance(group_flag, str) or not isinstance(group_data, dict): + continue + title = (group_data.get("meta") or {}).get("title") + if isinstance(title, str) and title: + result.group_names[group_flag] = title + + channels = group_data.get("channels") + if not isinstance(channels, dict): + continue + for nest in channels.keys(): + if _is_tlon_channel(nest): + result.channels.add(nest) + result.channel_to_group[nest] = group_flag + + foreigns = raw.get("foreigns") + if isinstance(foreigns, dict): + result.foreigns = foreigns + + return result + + +def parse_legacy_groups(raw: Any) -> TlonDiscovery: + """Parse older ``/groups/v1/groups.json`` style data.""" + result = TlonDiscovery() + if not isinstance(raw, dict): + return result + + for group_flag, group_data in raw.items(): + if not isinstance(group_flag, str) or not isinstance(group_data, dict): + continue + meta = group_data.get("meta") or {} + title = meta.get("title") if isinstance(meta, dict) else None + if isinstance(title, str) and title: + result.group_names[group_flag] = title + + channels = group_data.get("channels") + if not isinstance(channels, dict): + continue + for nest in channels.keys(): + if _is_tlon_channel(nest): + result.channels.add(nest) + result.channel_to_group[nest] = group_flag + + return result + + +def pending_group_invites(foreigns: Dict[str, Any]) -> list[dict[str, Any]]: + """Return valid pending foreign group invites from groups-ui foreign data.""" + invites: list[dict[str, Any]] = [] + for group_flag, data in foreigns.items(): + if not isinstance(data, dict): + continue + raw_invites = data.get("invites") + if not isinstance(raw_invites, list): + continue + for invite in raw_invites: + if isinstance(invite, dict) and invite.get("valid"): + item = dict(invite) + item["groupFlag"] = group_flag + preview = data.get("preview") + if isinstance(preview, dict): + item["groupTitle"] = ( + ((preview.get("meta") or {}).get("title")) + if isinstance(preview.get("meta"), dict) + else None + ) + invites.append(item) + return invites + + +def _is_tlon_channel(nest: Any) -> bool: + return isinstance(nest, str) and nest.startswith(("chat/", "heap/", "diary/")) diff --git a/gateway/platforms/tlon_media.py b/gateway/platforms/tlon_media.py new file mode 100644 index 000000000000..648405c8918e --- /dev/null +++ b/gateway/platforms/tlon_media.py @@ -0,0 +1,342 @@ +"""Tlon media and blob helpers. + +Tlon posts can carry two attachment shapes: +- rich Story image blocks inside ``content`` +- a serialized ``blob`` field for files, voice memos, and videos + +This module parses those shapes, formats lightweight text annotations for the +agent, and downloads safe HTTP(S) blobs into Hermes' existing media caches. +""" + +from __future__ import annotations + +import ipaddress +import json +import mimetypes +import os +import socket +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Iterable, List, Optional, Tuple +from urllib.parse import urlparse + +from gateway.platforms.base import ( + cache_audio_from_bytes, + cache_document_from_bytes, + cache_image_from_bytes, +) + +MAX_BLOB_DOWNLOAD_BYTES = 100 * 1024 * 1024 + + +@dataclass(frozen=True) +class TlonBlobEntry: + """A supported entry parsed from a Tlon post ``blob`` field.""" + + type: str + file_uri: Optional[str] = None + name: Optional[str] = None + mime_type: Optional[str] = None + size: Optional[int] = None + duration: Optional[float] = None + transcription: Optional[str] = None + + +@dataclass(frozen=True) +class TlonDownloadedAttachment: + """A local attachment downloaded from Tlon media.""" + + path: str + content_type: str + + +def extract_image_blocks(content: Any) -> List[Tuple[str, str]]: + """Return ``(url, alt)`` pairs from Story image blocks.""" + if not isinstance(content, list): + return [] + + images: List[Tuple[str, str]] = [] + for verse in content: + if not isinstance(verse, dict): + continue + image = (verse.get("block") or {}).get("image") + if not isinstance(image, dict): + continue + src = image.get("src") + if isinstance(src, str) and src: + alt = image.get("alt") + images.append((src, alt if isinstance(alt, str) else "")) + return images + + +def parse_blob_data(blob: Optional[str]) -> List[TlonBlobEntry]: + """Parse Tlon blob JSON, keeping only supported attachment entries.""" + if not blob: + return [] + try: + parsed = json.loads(blob) + except (TypeError, ValueError): + return [] + + if not isinstance(parsed, list): + return [] + + entries: List[TlonBlobEntry] = [] + for raw in parsed: + if not isinstance(raw, dict): + continue + entry_type = raw.get("type") + if entry_type not in {"file", "voicememo", "video"}: + continue + + file_uri = raw.get("fileUri") + name = raw.get("name") + mime_type = raw.get("mimeType") + size = raw.get("size") + duration = raw.get("duration") + transcription = raw.get("transcription") + + entries.append( + TlonBlobEntry( + type=entry_type, + file_uri=file_uri if isinstance(file_uri, str) else None, + name=name if isinstance(name, str) else None, + mime_type=mime_type if isinstance(mime_type, str) else None, + size=size if isinstance(size, int) and size >= 0 else None, + duration=( + float(duration) + if isinstance(duration, (int, float)) and duration >= 0 + else None + ), + transcription=( + transcription if isinstance(transcription, str) else None + ), + ) + ) + return entries + + +def format_blob_annotations(entries: Iterable[TlonBlobEntry]) -> str: + """Format blob metadata as plain text for the agent.""" + lines: List[str] = [] + for entry in entries: + uri = f" {entry.file_uri}" if entry.file_uri else "" + if entry.type == "file": + name = entry.name or "file" + mime = entry.mime_type or "unknown" + size = _format_size(entry.size) + lines.append(f"[file: {name} ({mime}, {size})]{uri}") + elif entry.type == "voicememo": + duration = f"{round(entry.duration)}s" if entry.duration else "unknown duration" + lines.append(f"[voice memo: {duration}]{uri}") + if entry.transcription: + lines.append(f'Transcription: "{entry.transcription}"') + elif entry.type == "video": + name = entry.name or "video" + mime = entry.mime_type or "video" + size = _format_size(entry.size) + lines.append(f"[video: {name} ({mime}, {size})]{uri}") + return "\n".join(lines) + + +async def download_story_images( + content: Any, + *, + max_bytes: int = MAX_BLOB_DOWNLOAD_BYTES, +) -> List[TlonDownloadedAttachment]: + """Download Story image blocks into Hermes' image cache.""" + attachments: List[TlonDownloadedAttachment] = [] + for url, _alt in extract_image_blocks(content): + downloaded = await download_media_url(url, max_bytes=max_bytes) + if downloaded: + attachments.append(downloaded) + return attachments + + +async def download_blob_attachments( + entries: Iterable[TlonBlobEntry], + *, + max_bytes: int = MAX_BLOB_DOWNLOAD_BYTES, +) -> Tuple[List[TlonDownloadedAttachment], List[str]]: + """Download supported blob attachments and return ``(files, notices)``.""" + attachments: List[TlonDownloadedAttachment] = [] + notices: List[str] = [] + + for entry in entries: + if not entry.file_uri: + continue + if entry.size is not None and entry.size > max_bytes: + notices.append(_too_large_notice(entry, entry.size, max_bytes)) + continue + + downloaded = await download_media_url( + entry.file_uri, + filename_hint=entry.name, + fallback_content_type=entry.mime_type, + max_bytes=max_bytes, + ) + if downloaded: + attachments.append(downloaded) + return attachments, notices + + +async def download_media_url( + url: str, + *, + filename_hint: Optional[str] = None, + fallback_content_type: Optional[str] = None, + max_bytes: int = MAX_BLOB_DOWNLOAD_BYTES, +) -> Optional[TlonDownloadedAttachment]: + """Download a safe HTTP(S) URL into the right Hermes cache.""" + if not _is_safe_http_url(url): + return None + + try: + import aiohttp + except ImportError: + return None + + try: + timeout = aiohttp.ClientTimeout(total=45) + async with aiohttp.ClientSession(timeout=timeout) as session: + async with session.get(url, allow_redirects=True) as resp: + if resp.status != 200: + return None + + content_type = ( + resp.headers.get("content-type") + or fallback_content_type + or "application/octet-stream" + ) + declared = resp.headers.get("content-length") + if declared and declared.isdigit() and int(declared) > max_bytes: + return None + + chunks: List[bytes] = [] + total = 0 + async for chunk in resp.content.iter_chunked(1024 * 64): + total += len(chunk) + if total > max_bytes: + return None + chunks.append(chunk) + except Exception: + return None + + data = b"".join(chunks) + media_type = content_type.split(";", 1)[0].strip().lower() + ext = _extension_for(media_type, url) + filename = _safe_filename(filename_hint, ext) + + try: + if media_type.startswith("image/"): + path = cache_image_from_bytes(data, ext=ext) + elif media_type.startswith("audio/"): + path = cache_audio_from_bytes(data, ext=ext) + else: + path = cache_document_from_bytes(data, filename) + except Exception: + return None + + return TlonDownloadedAttachment(path=path, content_type=media_type) + + +def message_type_for_media(content_type: str): + """Return the Hermes MessageType best matching a content type.""" + from gateway.platforms.base import MessageType + + if content_type.startswith("image/"): + return MessageType.PHOTO + if content_type.startswith("audio/"): + return MessageType.VOICE + if content_type.startswith("video/"): + return MessageType.VIDEO + return MessageType.DOCUMENT + + +def combined_message_type(content_types: List[str]): + """Return a single MessageType for a list of downloaded content types.""" + from gateway.platforms.base import MessageType + + if not content_types: + return MessageType.TEXT + if all(ct.startswith("image/") for ct in content_types): + return MessageType.PHOTO + if all(ct.startswith("audio/") for ct in content_types): + return MessageType.VOICE + if all(ct.startswith("video/") for ct in content_types): + return MessageType.VIDEO + return MessageType.DOCUMENT + + +def _is_safe_http_url(url: str) -> bool: + """Conservative SSRF guard for attachment downloads.""" + try: + parsed = urlparse(url) + except Exception: + return False + + if parsed.scheme not in {"http", "https"} or not parsed.hostname: + return False + + if os.getenv("TLON_ALLOW_PRIVATE_MEDIA_URLS", "").lower() in { + "1", + "true", + "yes", + }: + return True + + host = parsed.hostname + try: + infos = socket.getaddrinfo(host, parsed.port or 443, type=socket.SOCK_STREAM) + except socket.gaierror: + return False + + for info in infos: + sockaddr = info[4] + ip = ipaddress.ip_address(sockaddr[0]) + if ( + ip.is_private + or ip.is_loopback + or ip.is_link_local + or ip.is_multicast + or ip.is_reserved + or ip.is_unspecified + ): + return False + return True + + +def _extension_for(content_type: str, url: str) -> str: + ext = mimetypes.guess_extension(content_type) or "" + if ext == ".jpe": + ext = ".jpg" + if ext: + return ext + suffix = Path(urlparse(url).path).suffix + return suffix if suffix else ".bin" + + +def _safe_filename(filename_hint: Optional[str], ext: str) -> str: + if filename_hint: + name = Path(filename_hint).name.replace("\x00", "").strip() + if name and name not in {".", ".."}: + return name + return f"tlon-attachment{ext}" + + +def _format_size(size: Optional[int]) -> str: + if size is None: + return "unknown size" + if size < 1024: + return f"{size}B" + if size < 1024 * 1024: + return f"{round(size / 1024)}KB" + return f"{size / (1024 * 1024):.1f}MB" + + +def _too_large_notice(entry: TlonBlobEntry, size: int, max_bytes: int) -> str: + label = entry.name or ("voice memo" if entry.type == "voicememo" else "blob") + return ( + f"[blob not downloaded: {label} is {_format_size(size)}, " + f"over the {_format_size(max_bytes)} limit]" + ) diff --git a/gateway/platforms/tlon_settings.py b/gateway/platforms/tlon_settings.py new file mode 100644 index 000000000000..e723a1d95202 --- /dev/null +++ b/gateway/platforms/tlon_settings.py @@ -0,0 +1,177 @@ +"""Settings-store parsing for the Hermes Tlon adapter.""" + +from __future__ import annotations + +import json +from dataclasses import dataclass, field +from typing import Any, Dict, List, Optional + + +SETTINGS_DESK = "moltbot" +SETTINGS_BUCKET = "tlon" + + +@dataclass +class TlonSettings: + group_channels: Optional[List[str]] = None + dm_allowlist: Optional[List[str]] = None + auto_discover: Optional[bool] = None + auto_accept_dm_invites: Optional[bool] = None + auto_accept_group_invites: Optional[bool] = None + group_invite_allowlist: Optional[List[str]] = None + channel_rules: Dict[str, Dict[str, Any]] = field(default_factory=dict) + default_authorized_ships: Optional[List[str]] = None + owner_ship: Optional[str] = None + pending_approvals: Optional[List[Dict[str, Any]]] = None + show_model_signature: Optional[bool] = None + + +def parse_settings_response(raw: Any) -> TlonSettings: + """Parse ``/settings/all.json`` response for desk ``moltbot`` bucket ``tlon``.""" + bucket = _extract_bucket(raw) + settings = TlonSettings() + if not bucket: + return settings + + settings.group_channels = _string_list(bucket.get("groupChannels")) + settings.dm_allowlist = _string_list(bucket.get("dmAllowlist")) + settings.auto_discover = _bool_or_none(bucket.get("autoDiscover")) + settings.auto_accept_dm_invites = _bool_or_none(bucket.get("autoAcceptDmInvites")) + settings.auto_accept_group_invites = _bool_or_none(bucket.get("autoAcceptGroupInvites")) + settings.group_invite_allowlist = _string_list(bucket.get("groupInviteAllowlist")) + settings.channel_rules = _channel_rules(bucket.get("channelRules")) + settings.default_authorized_ships = _string_list(bucket.get("defaultAuthorizedShips")) + settings.owner_ship = bucket.get("ownerShip") if isinstance(bucket.get("ownerShip"), str) else None + settings.pending_approvals = _pending_approvals(bucket.get("pendingApprovals")) + settings.show_model_signature = _bool_or_none(bucket.get("showModelSig")) + return settings + + +def parse_settings_event(event: Any) -> Optional[tuple[str, Any]]: + """Parse a settings-store subscription event into ``(key, value)``.""" + if not isinstance(event, dict): + return None + evt = event.get("settings-event") + if isinstance(evt, dict): + event = evt + + put = event.get("put-entry") + if isinstance(put, dict): + if put.get("desk") == SETTINGS_DESK and put.get("bucket-key") == SETTINGS_BUCKET: + key = put.get("entry-key") + return (str(key), put.get("value")) if key else None + + delete = event.get("del-entry") + if isinstance(delete, dict): + if delete.get("desk") == SETTINGS_DESK and delete.get("bucket-key") == SETTINGS_BUCKET: + key = delete.get("entry-key") + return (str(key), None) if key else None + + return None + + +def apply_settings_update(current: TlonSettings, key: str, value: Any) -> TlonSettings: + """Return a copy of ``current`` with one settings-store update applied.""" + data = { + "group_channels": current.group_channels, + "dm_allowlist": current.dm_allowlist, + "auto_discover": current.auto_discover, + "auto_accept_dm_invites": current.auto_accept_dm_invites, + "auto_accept_group_invites": current.auto_accept_group_invites, + "group_invite_allowlist": current.group_invite_allowlist, + "channel_rules": dict(current.channel_rules), + "default_authorized_ships": current.default_authorized_ships, + "owner_ship": current.owner_ship, + "pending_approvals": current.pending_approvals, + "show_model_signature": current.show_model_signature, + } + + if key == "groupChannels": + data["group_channels"] = _string_list(value) + elif key == "dmAllowlist": + data["dm_allowlist"] = _string_list(value) + elif key == "autoDiscover": + data["auto_discover"] = _bool_or_none(value) + elif key == "autoAcceptDmInvites": + data["auto_accept_dm_invites"] = _bool_or_none(value) + elif key == "autoAcceptGroupInvites": + data["auto_accept_group_invites"] = _bool_or_none(value) + elif key == "groupInviteAllowlist": + data["group_invite_allowlist"] = _string_list(value) + elif key == "channelRules": + data["channel_rules"] = _channel_rules(value) + elif key == "defaultAuthorizedShips": + data["default_authorized_ships"] = _string_list(value) + elif key == "ownerShip": + data["owner_ship"] = value if isinstance(value, str) else None + elif key == "pendingApprovals": + data["pending_approvals"] = _pending_approvals(value) + elif key == "showModelSig": + data["show_model_signature"] = _bool_or_none(value) + + return TlonSettings(**data) + + +def _extract_bucket(raw: Any) -> Dict[str, Any]: + if not isinstance(raw, dict): + return {} + all_data = raw.get("all") + if isinstance(all_data, dict): + desk = all_data.get(SETTINGS_DESK) + else: + desk = raw.get(SETTINGS_DESK) if SETTINGS_DESK in raw else raw + if not isinstance(desk, dict): + return {} + bucket = desk.get(SETTINGS_BUCKET) + return bucket if isinstance(bucket, dict) else {} + + +def _string_list(value: Any) -> Optional[List[str]]: + if value is None: + return None + if not isinstance(value, list): + return None + return [item for item in value if isinstance(item, str)] + + +def _bool_or_none(value: Any) -> Optional[bool]: + return value if isinstance(value, bool) else None + + +def _channel_rules(value: Any) -> Dict[str, Dict[str, Any]]: + if isinstance(value, str): + try: + value = json.loads(value) + except ValueError: + return {} + if not isinstance(value, dict): + return {} + + rules: Dict[str, Dict[str, Any]] = {} + for channel, rule in value.items(): + if not isinstance(channel, str) or not isinstance(rule, dict): + continue + mode = rule.get("mode") + ships = rule.get("allowedShips") + parsed: Dict[str, Any] = {} + if mode in {"open", "restricted"}: + parsed["mode"] = mode + if isinstance(ships, list): + parsed["allowedShips"] = [s for s in ships if isinstance(s, str)] + rules[channel] = parsed + return rules + + +def _pending_approvals(value: Any) -> Optional[List[Dict[str, Any]]]: + if isinstance(value, str): + try: + value = json.loads(value) + except ValueError: + return None + if not isinstance(value, list): + return None + result: List[Dict[str, Any]] = [] + for item in value: + if isinstance(item, dict) and isinstance(item.get("id"), str): + result.append(item) + return result diff --git a/gateway/platforms/yuanbao.py b/gateway/platforms/yuanbao.py index d79da7856ae4..7015e0c848cf 100644 --- a/gateway/platforms/yuanbao.py +++ b/gateway/platforms/yuanbao.py @@ -147,6 +147,9 @@ r"\[(image|voice|video|file(?::[^|\]]*)?)\|ybres:([A-Za-z0-9_\-]+)\]" ) +# Media kinds that can be resolved and injected into the model context +_RESOLVABLE_MEDIA_KINDS = frozenset({"image", "file"}) + # Strip page indicators like (1/3) appended by BasePlatformAdapter _INDICATOR_RE = re.compile(r'\s*\(\d+/\d+\)$') @@ -925,6 +928,7 @@ class InboundContext: # Populated by QuoteContextMiddleware reply_to_message_id: Optional[str] = None reply_to_text: Optional[str] = None + quote_media_refs: list = dc_field(default_factory=list) # List of (rid, kind, filename) # Populated by MediaResolveMiddleware media_urls: list = dc_field(default_factory=list) @@ -1645,6 +1649,25 @@ def _format_link_understanding(custom: dict) -> Optional[str]: return None return f"[link: {link} | visit link for full content]" + @staticmethod + def _parse_resource_id(url: str) -> str: + """Extract resourceId from Yuanbao resource URL query parameters. + + Args: + url: Resource URL (e.g., https://...?resourceId=abc123) + + Returns: + Resource ID string, or empty string if not found + """ + if not url: + return "" + try: + query = urllib.parse.parse_qs(urllib.parse.urlparse(url).query) + ids = query.get("resourceId") or query.get("resourceid") or [] + return str(ids[0]).strip() if ids else "" + except Exception: + return "" + @classmethod def _extract_text(cls, msg_body: list) -> str: """Extract plain text content from MsgBody. @@ -1668,14 +1691,35 @@ def _extract_text(cls, msg_body: list) -> str: if text: parts.append(text) elif elem_type == "TIMImageElem": - parts.append("[image]") + # Extract resourceId from image_info_array URL + image_info_array = content.get("image_info_array") + if not isinstance(image_info_array, list): + image_info_array = [] + image_info = None + # Prefer medium image (index 1), fallback to index 0 + if len(image_info_array) > 1 and isinstance(image_info_array[1], dict): + image_info = image_info_array[1] + elif len(image_info_array) > 0 and isinstance(image_info_array[0], dict): + image_info = image_info_array[0] + image_url = str((image_info or {}).get("url") or "").strip() + rid = cls._parse_resource_id(image_url) + parts.append(f"[image|ybres:{rid}]" if rid else "[image]") elif elem_type == "TIMFileElem": filename = content.get("file_name", content.get("fileName", content.get("filename", ""))) - parts.append(f"[file: {filename}]" if filename else "[file]") + file_url = str(content.get("url") or "").strip() + rid = cls._parse_resource_id(file_url) + if rid: + parts.append(f"[file:{filename}|ybres:{rid}]" if filename else f"[file|ybres:{rid}]") + else: + parts.append(f"[file: {filename}]" if filename else "[file]") elif elem_type == "TIMSoundElem": - parts.append("[voice]") + sound_url = str(content.get("url") or "").strip() + rid = cls._parse_resource_id(sound_url) + parts.append(f"[voice|ybres:{rid}]" if rid else "[voice]") elif elem_type == "TIMVideoFileElem": - parts.append("[video]") + video_url = str(content.get("url") or "").strip() + rid = cls._parse_resource_id(video_url) + parts.append(f"[video|ybres:{rid}]" if rid else "[video]") elif elem_type == "TIMCustomElem": data_val = content.get("data", "") if data_val: @@ -2132,22 +2176,23 @@ class QuoteContextMiddleware(InboundMiddleware): name = "quote-context" @staticmethod - def _extract_quote_context(cloud_custom_data: str) -> Tuple[Optional[str], Optional[str]]: + def _extract_quote_context(cloud_custom_data: str) -> Tuple[Optional[str], Optional[str], list]: """Extract quote context, mapping to MessageEvent.reply_to_*. Returns: - (reply_to_message_id, reply_to_text) + (reply_to_message_id, reply_to_text, quote_media_refs) + where quote_media_refs is a list of (rid, kind, filename) tuples """ if not cloud_custom_data: - return None, None + return None, None, [] try: parsed = json.loads(cloud_custom_data) except (json.JSONDecodeError, TypeError): - return None, None + return None, None, [] quote = parsed.get("quote") if isinstance(parsed, dict) else None if not isinstance(quote, dict): - return None, None + return None, None, [] # type=2 corresponds to image reference; desc may be empty, provide a placeholder. quote_type = int(quote.get("type") or 0) @@ -2155,15 +2200,26 @@ def _extract_quote_context(cloud_custom_data: str) -> Tuple[Optional[str], Optio if quote_type == 2 and not desc: desc = "[image]" if not desc: - return None, None + return None, None, [] quote_id = str(quote.get("id") or "").strip() or None sender = str(quote.get("sender_nickname") or quote.get("sender_id") or "").strip() quote_text = f"{sender}: {desc}" if sender else desc - return quote_id, quote_text + + # Extract media references from desc using _YB_RES_REF_RE regex + media_refs: list = [] + for m in _YB_RES_REF_RE.finditer(desc): + head = m.group(1) # "image" | "file:" | "voice" | "video" + rid = m.group(2) + kind, _, filename = head.partition(":") + kind = kind.strip() + media_refs.append((rid, kind, filename.strip())) + + return quote_id, quote_text, media_refs async def handle(self, ctx: InboundContext, next_fn) -> None: - ctx.reply_to_message_id, ctx.reply_to_text = self._extract_quote_context(ctx.cloud_custom_data) + ctx.reply_to_message_id, ctx.reply_to_text, ctx.quote_media_refs = self._extract_quote_context(ctx.cloud_custom_data) + await next_fn() @@ -2332,7 +2388,7 @@ async def _resolve_media_urls( for ref in media_refs: kind = str(ref.get("kind") or "").strip().lower() url = str(ref.get("url") or "").strip() - if kind not in {"image", "file"} or not url: + if kind not in _RESOLVABLE_MEDIA_KINDS or not url: continue try: @@ -2391,7 +2447,7 @@ async def _collect_observed_media( rid = m.group(2) kind, _, filename = head.partition(":") kind = kind.strip() - if kind not in {"image", "file"}: + if kind not in _RESOLVABLE_MEDIA_KINDS: continue if rid in seen: continue @@ -2458,26 +2514,82 @@ async def _dispatch_inbound_event() -> None: media_urls = list(ctx.media_urls) media_types = list(ctx.media_types) - # Backfill observed media from recent transcript history - extra_img_urls: List[str] = [] - extra_img_mimes: List[str] = [] - try: - extra_img_urls, extra_img_mimes = await MediaResolveMiddleware._collect_observed_media( - adapter, ctx.source, - ) - except Exception as exc: - logger.warning( - "[%s] observed-image hydration raised, continuing anyway: %s", - adapter.name, exc, - ) - if extra_img_urls: - current = set(media_urls) - for u, m in zip(extra_img_urls, extra_img_mimes): - if u in current: + # If user quoted a message (reply_to_message_id is set), resolve only + # quote_media_refs to avoid injecting unrelated history media. + # Otherwise, backfill observed media from recent transcript history. + if ctx.reply_to_message_id is not None: + # Fallback: if desc didn't contain ybres refs, look up transcript + if not ctx.quote_media_refs: + try: + store = getattr(adapter, "_session_store", None) + if store: + session_entry = store.get_or_create_session(ctx.source) + history = store.load_transcript(session_entry.session_id) + for msg in reversed(history or []): + mid = msg.get("message_id", "") + if mid and mid == ctx.reply_to_message_id: + _content = msg.get("content", "") + if isinstance(_content, str) and "|ybres:" in _content: + for m in _YB_RES_REF_RE.finditer(_content): + head = m.group(1) + rid = m.group(2) + kind, _, filename = head.partition(":") + kind = kind.strip() + if kind in _RESOLVABLE_MEDIA_KINDS: + ctx.quote_media_refs.append((rid, kind, filename.strip())) + break + except Exception as exc: + logger.warning( + "[%s] quote transcript lookup failed: %s", + adapter.name, exc, + ) + # User quoted a message โ€” resolve only media from the quote + for rid, kind, filename in ctx.quote_media_refs: + if kind not in _RESOLVABLE_MEDIA_KINDS: continue - media_urls.append(u) - media_types.append(m) - current.add(u) + try: + fresh_url = await MediaResolveMiddleware._resolve_by_resource_id(adapter, rid) + except Exception as exc: + logger.warning( + "[%s] quote media resolve failed: rid=%s kind=%s err=%s", + adapter.name, rid, kind, exc, + ) + continue + cached = await MediaResolveMiddleware._download_and_cache( + adapter, + fetch_url=fresh_url, + kind=kind, + file_name=filename or None, + log_tag=f"quote rid={rid}", + ) + if cached is None: + continue + path, mime = cached + # Avoid duplicates + if path not in media_urls: + media_urls.append(path) + media_types.append(mime) + else: + # No quote โ€” backfill observed media from recent transcript history + extra_img_urls: List[str] = [] + extra_img_mimes: List[str] = [] + try: + extra_img_urls, extra_img_mimes = await MediaResolveMiddleware._collect_observed_media( + adapter, ctx.source, + ) + except Exception as exc: + logger.warning( + "[%s] observed-image hydration raised, continuing anyway: %s", + adapter.name, exc, + ) + if extra_img_urls: + current = set(media_urls) + for u, m in zip(extra_img_urls, extra_img_mimes): + if u in current: + continue + media_urls.append(u) + media_types.append(m) + current.add(u) # Replace [kind|ybres:xxx] anchors with local cache paths so # the transcript records usable paths for the model. @@ -2506,7 +2618,11 @@ async def _dispatch_inbound_event() -> None: event = MessageEvent( text=_patched_event_text, - message_type=ctx.msg_type, + message_type=( + MessageType.DOCUMENT + if any(mt.startswith(("application/", "text/")) for mt in media_types) + else ctx.msg_type + ), source=ctx.source, message_id=ctx.msg_id or None, raw_message=ctx.push, diff --git a/gateway/run.py b/gateway/run.py index 77ed7260c3bc..7d4f7f3c8be1 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -3349,6 +3349,7 @@ async def start(self) -> bool: "BLUEBUBBLES_ALLOWED_USERS", "QQ_ALLOWED_USERS", "YUANBAO_ALLOWED_USERS", + "TLON_ALLOWED_USERS", "GATEWAY_ALLOWED_USERS", ) _builtin_allow_all_vars = ( @@ -3364,6 +3365,7 @@ async def start(self) -> bool: "BLUEBUBBLES_ALLOW_ALL_USERS", "QQ_ALLOW_ALL_USERS", "YUANBAO_ALLOW_ALL_USERS", + "TLON_ALLOW_ALL_USERS", ) # Also pick up plugin-registered platforms โ€” each entry can declare # its own allowed_users_env / allow_all_env, so the warning stays @@ -5377,6 +5379,13 @@ def _create_adapter( return None return YuanbaoAdapter(config) + elif platform == Platform.TLON: + from gateway.platforms.tlon import TlonAdapter, check_tlon_requirements + if not check_tlon_requirements(): + logger.warning("Tlon: aiohttp not installed or TLON_SHIP_URL/NAME/CODE not set") + return None + return TlonAdapter(config) + return None def _is_user_authorized(self, source: SessionSource) -> bool: """ @@ -5419,6 +5428,7 @@ def _is_user_authorized(self, source: SessionSource) -> bool: Platform.BLUEBUBBLES: "BLUEBUBBLES_ALLOWED_USERS", Platform.QQBOT: "QQ_ALLOWED_USERS", Platform.YUANBAO: "YUANBAO_ALLOWED_USERS", + Platform.TLON: "TLON_ALLOWED_USERS", } platform_group_user_env_map = { Platform.TELEGRAM: "TELEGRAM_GROUP_ALLOWED_USERS", @@ -5445,6 +5455,7 @@ def _is_user_authorized(self, source: SessionSource) -> bool: Platform.BLUEBUBBLES: "BLUEBUBBLES_ALLOW_ALL_USERS", Platform.QQBOT: "QQ_ALLOW_ALL_USERS", Platform.YUANBAO: "YUANBAO_ALLOW_ALL_USERS", + Platform.TLON: "TLON_ALLOW_ALL_USERS", } # Bots admitted by {PLATFORM}_ALLOW_BOTS bypass the human allowlist (#4466). platform_allow_bots_map = { @@ -5630,6 +5641,7 @@ def _get_unauthorized_dm_behavior(self, platform: Optional[Platform]) -> str: Platform.WEIXIN: "WEIXIN_ALLOWED_USERS", Platform.BLUEBUBBLES: "BLUEBUBBLES_ALLOWED_USERS", Platform.QQBOT: "QQ_ALLOWED_USERS", + Platform.TLON: "TLON_ALLOWED_USERS", } platform_group_env_map = { Platform.TELEGRAM: ( @@ -6809,6 +6821,12 @@ async def _prepare_inbound_message_text( if _is_shared_multi_user and source.user_name: message_text = f"[{source.user_name}] {message_text}" + # Prepend channel context from history backfill (if any). This + # happens after sender-prefix so the prefix only applies to the + # trigger message, not the backfill block. + if getattr(event, "channel_context", None): + message_text = f"{event.channel_context}\n\n[New message]\n{message_text}" + if event.media_urls: image_paths = [] audio_paths = [] @@ -7985,6 +8003,8 @@ async def _handle_message_with_agent(self, event, source, _quick_key: str, run_g try: if _err_body is not None: _err_json = _err_body.json().get("error", {}) + if not isinstance(_err_json, dict): + _err_json = {} except Exception: pass if _err_json.get("type") == "usage_limit_reached": @@ -12446,6 +12466,7 @@ async def _handle_deny_command(self, event: MessageEvent) -> str: Platform.SIGNAL, Platform.MATTERMOST, Platform.MATRIX, Platform.HOMEASSISTANT, Platform.EMAIL, Platform.SMS, Platform.DINGTALK, Platform.FEISHU, Platform.WECOM, Platform.WECOM_CALLBACK, Platform.WEIXIN, Platform.BLUEBUBBLES, Platform.QQBOT, Platform.LOCAL, + Platform.TLON, }) async def _handle_debug_command(self, event: MessageEvent) -> str: diff --git a/gateway/session.py b/gateway/session.py index ac6f95eec63c..dfa2ca9651de 100644 --- a/gateway/session.py +++ b/gateway/session.py @@ -518,6 +518,9 @@ def to_dict(self) -> Dict[str, Any]: else None ), "is_fresh_reset": self.is_fresh_reset, + "was_auto_reset": self.was_auto_reset, + "auto_reset_reason": self.auto_reset_reason, + "reset_had_activity": self.reset_had_activity, } if self.origin: result["origin"] = self.origin.to_dict() @@ -567,6 +570,9 @@ def from_dict(cls, data: Dict[str, Any]) -> "SessionEntry": resume_reason=data.get("resume_reason"), last_resume_marked_at=last_resume_marked_at, is_fresh_reset=data.get("is_fresh_reset", False), + was_auto_reset=data.get("was_auto_reset", False), + auto_reset_reason=data.get("auto_reset_reason"), + reset_had_activity=data.get("reset_had_activity", False), ) diff --git a/hermes_cli/banner.py b/hermes_cli/banner.py index c4ec348ef489..036412ac072c 100644 --- a/hermes_cli/banner.py +++ b/hermes_cli/banner.py @@ -470,6 +470,9 @@ def build_welcome_banner(console: Console, model: str, cwd: str, model_short = model_short[:25] + "..." ctx_str = f" [dim {dim}]ยท[/] [dim {dim}]{_format_context_length(context_length)} context[/]" if context_length else "" left_lines.append(f"[{accent}]{model_short}[/]{ctx_str} [dim {dim}]ยท[/] [dim {dim}]Nous Research[/]") + + if os.getenv("HERMES_YOLO_MODE"): + left_lines.append(f"[bold red]โš  YOLO mode[/] [dim {dim}]โ€” all approval prompts bypassed[/]") left_lines.append(f"[dim {dim}]{cwd}[/]") if session_id: left_lines.append(f"[dim {session_color}]Session: {session_id}[/]") diff --git a/hermes_cli/codex_runtime_plugin_migration.py b/hermes_cli/codex_runtime_plugin_migration.py index dd7faa097943..4b30d3ebf261 100644 --- a/hermes_cli/codex_runtime_plugin_migration.py +++ b/hermes_cli/codex_runtime_plugin_migration.py @@ -304,6 +304,103 @@ def render_codex_toml_section( return "\n".join(out) + "\n" +def _insert_managed_block_at_top_level(user_text: str, managed_block: str) -> str: + """Insert Hermes' managed Codex TOML block while keeping root keys root-scoped. + + TOML has no syntax to return to the document root after a table header. + Therefore appending a root key like `default_permissions = ...` after a + user table such as `[features]` actually creates `features.default_permissions`, + which Codex rejects. Insert the managed block before the first table header + so its root keys remain top-level, while preserving user content verbatim. + """ + if not user_text.strip(): + return managed_block + + lines = user_text.splitlines(keepends=True) + first_table_idx: Optional[int] = None + for idx, line in enumerate(lines): + stripped = line.lstrip() + if stripped.startswith("["): + first_table_idx = idx + break + + if first_table_idx is None: + prefix = user_text.rstrip("\n") + return f"{prefix}\n\n{managed_block}" if prefix else managed_block + + prefix = "".join(lines[:first_table_idx]).rstrip("\n") + suffix = "".join(lines[first_table_idx:]).lstrip("\n") + if prefix: + return f"{prefix}\n\n{managed_block}\n{suffix}" + return f"{managed_block}\n{suffix}" + + +def _strip_unmanaged_plugin_tables(toml_text: str) -> str: + """Remove ``[plugins."@"]`` tables that live OUTSIDE the + managed block. + + Codex itself writes these tables when the user runs ``codex plugins enable`` + directly (i.e. before Hermes' migrate has ever touched the file). When we + later run migrate, ``_query_codex_plugins()`` reports the same plugins via + the live ``plugin/list`` RPC and we re-emit them inside the managed block. + The result without this strip is duplicate ``[plugins."X@Y"]`` table + headers โ€” codex's strict TOML parser then refuses to load the file. + + We own the ``[plugins.*]`` namespace once migrate has run, so dropping any + pre-existing ``[plugins.*]`` tables is safe: ``plugin/list`` is the source + of truth for what's actually installed. The caller is expected to only + invoke this strip when ``plugin/list`` succeeded โ€” otherwise we'd lose + plugins the user installed via ``codex`` without a way to re-emit them. + + Behavior: + * Lines beginning with ``[plugins.`` start a swallow region that ends at + the next non-``[plugins.`` table header or end-of-file. + * Content inside the managed block is untouched (callers should run + ``_strip_existing_managed_block`` first so the managed block has + already been removed when this runs). + """ + lines = toml_text.splitlines(keepends=True) + out: list[str] = [] + in_plugin_table = False + for line in lines: + stripped = line.lstrip() + # Only treat a line as a table header when it has the shape + # ``[...]`` (optionally followed by a comment). Multi-line array + # continuations like ``["nested"],`` also start with ``[`` after + # lstrip but are not headers โ€” without this guard they would + # falsely flip ``in_plugin_table`` to False mid-table and leak + # array fragments into the output. + if _looks_like_table_header(stripped): + in_plugin_table = stripped.startswith("[plugins.") + if in_plugin_table: + continue + if in_plugin_table: + # Swallow keys/comments/blanks until the next table header. + continue + out.append(line) + return "".join(out) + + +def _looks_like_table_header(stripped_line: str) -> bool: + """Return True if ``stripped_line`` is a TOML table header. + + A header has the shape ``[name]`` or ``[[name]]`` (array-of-tables), + optionally followed by a comment. The closing ``]`` (or ``]]``) must + appear on the same line, and no key-assignment ``=`` can precede it. + This distinguishes real headers from multi-line array continuation + lines that also start with ``[`` after ``lstrip()``. + """ + if not stripped_line.startswith("["): + return False + # Drop trailing comment so e.g. ``[features] # note`` still matches. + head = stripped_line.split("#", 1)[0].rstrip() + if not head.endswith("]"): + return False + # ``key = [x]`` would have an ``=`` before the bracket; a header doesn't. + bracket_idx = head.index("]") + return "=" not in head[: bracket_idx + 1] + + def _strip_existing_managed_block(toml_text: str) -> str: """Remove any prior managed section so re-runs idempotently replace it. @@ -431,6 +528,32 @@ def _query_codex_plugins( return out, None +def _looks_like_test_tempdir(path: str) -> bool: + """Heuristic: does ``path`` look like a pytest/transient tempdir? + + pytest tempdirs live under ``pytest-of-/pytest-/`` (created via + ``tmp_path`` / ``tmp_path_factory``) and are reaped between sessions. + macOS routes ``/tmp`` through ``/private/var/folders/<โ€ฆ>/T`` which is + what pytest's tempdir factory uses by default. If a HERMES_HOME pointing + at one of those paths is burned into ``~/.codex/config.toml``, every + codex-routed hermes-tools call fails silently once the directory is GC'd. + + We err on the side of refusing โ€” losing a (very unlikely) real + ``~/.hermes`` symlink that happens to live under ``/private/var/folders`` + is much less harmful than silently bricking codex's tool surface. + """ + if not path: + return False + needles = ( + "pytest-of-", + "/pytest-", + "/tmp/pytest", + "/private/var/folders/", # macOS tempdir root + ) + normalized = path.lower() + return any(needle in normalized for needle in needles) + + def _build_hermes_tools_mcp_entry() -> dict: """Build the codex stdio-transport entry that launches Hermes' own tool surface as an MCP server. Codex's subprocess will call back into @@ -443,9 +566,22 @@ def _build_hermes_tools_mcp_entry() -> dict: import sys env: dict[str, str] = {} - # HERMES_HOME passes through if set so the MCP subprocess sees the - # same config / auth / sessions DB as the parent CLI. - hermes_home = os.environ.get("HERMES_HOME") + # HERMES_HOME passes through IF SET so the MCP subprocess sees the same + # config / auth / sessions DB as the parent CLI. Read from os.environ + # (not get_hermes_home()) on purpose: when the env var is unset we want + # codex's subprocess to inherit whatever HERMES_HOME its launcher sets + # at runtime (systemd unit, gateway, kanban dispatcher, custom shell), + # rather than burning the migrate-time resolved default into config.toml + # โ€” that would override the launcher's HERMES_HOME and pin the subprocess + # to the wrong profile. + # + # The pytest-tempdir guard below catches the issue #26250 Bug C scenario: + # a sibling test's monkeypatch.setenv("HERMES_HOME", tmp_path) would + # otherwise leak a transient pytest tempdir into the user's real + # ~/.codex/config.toml and silently brick codex once the tempdir is GC'd. + hermes_home = os.environ.get("HERMES_HOME") or "" + if hermes_home and _looks_like_test_tempdir(hermes_home): + hermes_home = "" if hermes_home: env["HERMES_HOME"] = hermes_home # PYTHONPATH passes through so a worktree-launched hermes finds the @@ -533,10 +669,16 @@ def migrate( # Discover installed Codex curated plugins. Best-effort โ€” never blocks # the migration if codex is unreachable or the RPC fails. plugins: list[dict] = [] + plugin_query_succeeded = False if discover_plugins and not dry_run: plugins, plugin_err = _query_codex_plugins(codex_home=codex_home) if plugin_err: report.plugin_query_error = plugin_err + else: + # plugin/list returned authoritatively (even if the list is empty). + # That means we own [plugins.*] for this re-render and can safely + # strip any pre-existing tables outside the managed block. + plugin_query_succeeded = True for p in plugins: report.migrated_plugins.append(f"{p['name']}@{p['marketplace']}") @@ -571,14 +713,15 @@ def migrate( report.errors.append(f"could not read {target}: {exc}") return report without_managed = _strip_existing_managed_block(existing) - # Ensure exactly one blank line between user content and managed block - if without_managed and not without_managed.endswith("\n"): - without_managed += "\n" - new_text = ( - without_managed.rstrip("\n") + "\n\n" + managed_block - if without_managed.strip() - else managed_block - ) + # Bug B: when plugin/list ran authoritatively, codex's own + # [plugins."@"] tables outside our managed block + # would survive _strip_existing_managed_block and then collide with + # the entries we re-emit inside the managed block โ€” producing + # duplicate-table-header parse errors on codex's next startup. Drop + # those pre-existing tables since plugin/list is the source of truth. + if plugin_query_succeeded: + without_managed = _strip_unmanaged_plugin_tables(without_managed) + new_text = _insert_managed_block_at_top_level(without_managed, managed_block) else: new_text = managed_block diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 685de3d7341f..a560e1e6a1ed 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -1251,6 +1251,8 @@ def _ensure_hermes_home_managed(home: Path): "allowed_channels": "", # If set, bot ONLY responds in these channel IDs (whitelist) "auto_thread": True, # Auto-create threads on @mention in channels (like Slack) "thread_require_mention": False, # If True, require @mention in threads too (multi-bot threads) + "history_backfill": True, # If True, prepend recent channel scrollback when bot is triggered (recovers messages missed while require_mention gated them out) + "history_backfill_limit": 50, # Max number of recent messages to scan when assembling the backfill block "reactions": True, # Add ๐Ÿ‘€/โœ…/โŒ reactions to messages during processing "channel_prompts": {}, # Per-channel ephemeral system prompts (forum parents apply to child threads) # Opt-in DM role-based auth (#12136). By default, DISCORD_ALLOWED_ROLES @@ -2136,22 +2138,6 @@ def _ensure_hermes_home_managed(home: Path): "password": True, "category": "tool", }, - "TINKER_API_KEY": { - "description": "Tinker API key for RL training", - "prompt": "Tinker API key", - "url": "https://tinker-console.thinkingmachines.ai/keys", - "tools": ["rl_start_training", "rl_check_status", "rl_stop_training"], - "password": True, - "category": "tool", - }, - "WANDB_API_KEY": { - "description": "Weights & Biases API key for experiment tracking", - "prompt": "WandB API key", - "url": "https://wandb.ai/authorize", - "tools": ["rl_get_results", "rl_check_status"], - "password": True, - "category": "tool", - }, "VOICE_TOOLS_OPENAI_KEY": { "description": "OpenAI API key for voice transcription (Whisper) and OpenAI TTS", "prompt": "OpenAI API Key (for Whisper STT + TTS)", @@ -4988,8 +4974,7 @@ def set_config_value(key: str, value: str): 'FAL_KEY', 'TELEGRAM_BOT_TOKEN', 'DISCORD_BOT_TOKEN', 'TERMINAL_SSH_HOST', 'TERMINAL_SSH_USER', 'TERMINAL_SSH_KEY', 'SUDO_PASSWORD', 'SLACK_BOT_TOKEN', 'SLACK_APP_TOKEN', - 'GITHUB_TOKEN', 'HONCHO_API_KEY', 'WANDB_API_KEY', - 'TINKER_API_KEY', + 'GITHUB_TOKEN', 'HONCHO_API_KEY', ] if key.upper() in api_keys or key.upper().endswith(('_API_KEY', '_TOKEN')) or key.upper().startswith('TERMINAL_SSH'): diff --git a/hermes_cli/cron.py b/hermes_cli/cron.py index adf4f0c09270..7bff9c6b87b5 100644 --- a/hermes_cli/cron.py +++ b/hermes_cli/cron.py @@ -196,9 +196,15 @@ def cron_create(args): def cron_edit(args): - from cron.jobs import get_job - - job = get_job(args.job_id) + from cron.jobs import AmbiguousJobReference, resolve_job_ref + + try: + job = resolve_job_ref(args.job_id) + except AmbiguousJobReference as exc: + print(color(str(exc), Colors.RED)) + for m in exc.matches: + print(f" {m['id']} (name: {m.get('name')!r})") + return 1 if not job: print(color(f"Job not found: {args.job_id}", Colors.RED)) return 1 diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py index a551d4d204e6..c2035b03e6e6 100644 --- a/hermes_cli/doctor.py +++ b/hermes_cli/doctor.py @@ -1595,28 +1595,6 @@ def _probe_bedrock() -> _ConnectivityResult: for _issue in _r.issues: issues.append(_issue) - # ========================================================================= - # Check: Submodules - # ========================================================================= - print() - print(color("โ—† Submodules", Colors.CYAN, Colors.BOLD)) - - # tinker-atropos (RL training backend) - tinker_dir = PROJECT_ROOT / "tinker-atropos" - if tinker_dir.exists() and (tinker_dir / "pyproject.toml").exists(): - if py_version >= (3, 11): - try: - __import__("tinker_atropos") - check_ok("tinker-atropos", "(RL training backend)") - except ImportError: - install_cmd = f"{_python_install_cmd()} -e ./tinker-atropos" - check_warn("tinker-atropos found but not installed", f"(run: {install_cmd})") - issues.append(f"Install tinker-atropos: {install_cmd}") - else: - check_warn("tinker-atropos requires Python 3.11+", f"(current: {py_version.major}.{py_version.minor})") - else: - check_warn("tinker-atropos not found", "(run: git submodule update --init --recursive)") - # ========================================================================= # Check: Tool Availability # ========================================================================= diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py index b0cb579daa8f..d6aae1199ae3 100644 --- a/hermes_cli/gateway.py +++ b/hermes_cli/gateway.py @@ -3648,6 +3648,33 @@ def _atexit_hook() -> None: "help": "The App Secret (used for HMAC signing) from your Yuanbao IM Bot."}, ], }, + { + "key": "tlon", + "label": "Tlon", + "emoji": "โ—†", + "token_var": "TLON_SHIP_URL", + "setup_instructions": [ + "1. Create or choose a Tlon ship for Hermes.", + "2. Copy the ship URL, ship name, and +code.", + "3. Set TLON_AUTO_DISCOVER=true to monitor joined group channels.", + "4. Add owner/allowed ships so DMs can reach Hermes.", + ], + "vars": [ + {"name": "TLON_SHIP_URL", "prompt": "Ship URL (e.g. https://sampel-palnet.tlon.network)", "password": False, + "help": "Base URL for the Tlon ship."}, + {"name": "TLON_SHIP_NAME", "prompt": "Ship name (e.g. ~sampel-palnet)", "password": False, + "help": "The patp of the Hermes ship."}, + {"name": "TLON_SHIP_CODE", "prompt": "Ship +code", "password": True, + "help": "Login code for the Hermes ship."}, + {"name": "TLON_OWNER_SHIP", "prompt": "Owner ship (e.g. ~zod)", "password": False, + "help": "Ship that receives approval requests and can run owner commands."}, + {"name": "TLON_ALLOWED_USERS", "prompt": "Allowed ships (comma-separated, or empty)", "password": False, + "is_allowlist": True, + "help": "Ships allowed to interact with Hermes."}, + {"name": "TLON_HOME_CHANNEL", "prompt": "Home channel/DM for cron delivery (or empty)", "password": False, + "help": "A channel nest like chat/~host/channel, or a ship like ~zod."}, + ], + }, ] def _all_platforms() -> list[dict]: """Return the full list of platforms for setup menus. diff --git a/hermes_cli/goals.py b/hermes_cli/goals.py index 1542b9a7a382..62ee00547c16 100644 --- a/hermes_cli/goals.py +++ b/hermes_cli/goals.py @@ -45,6 +45,16 @@ DEFAULT_MAX_TURNS = 20 DEFAULT_JUDGE_TIMEOUT = 30.0 +# Judge output budget. The freeform judge returns a one-line JSON verdict, but +# reasoning models (deepseek-v4, qwq, etc.) burn tokens on hidden reasoning +# before emitting the visible JSON โ€” and the first /goal turn's prompt is +# larger than later turns, which pushes total reply length past tight caps. +# 200 tokens (the original default) reliably truncated the JSON on reasoning +# models, leaving '{"done": true, "reason": "The agent successfully' and +# triggering the auto-pause. 4096 covers reasoning + verdict on every model +# we've live-tested; override via auxiliary.goal_judge.max_tokens for +# specifically constrained setups. +DEFAULT_JUDGE_MAX_TOKENS = 4096 # Cap how much of the last response + recent messages we send to the judge. _JUDGE_RESPONSE_SNIPPET_CHARS = 4000 # After this many consecutive judge *parse* failures (empty output / non-JSON), @@ -282,6 +292,30 @@ def _truncate(text: str, limit: int) -> str: _JSON_OBJECT_RE = re.compile(r"\{.*?\}", re.DOTALL) +def _goal_judge_max_tokens() -> int: + """Resolve auxiliary.goal_judge.max_tokens, falling back to the default. + + ``load_config()`` is cached on the config file's (mtime, size), so calling + this once per judge turn is cheap. A non-positive or non-int value falls + back to the default rather than crashing the goal loop. + """ + try: + from hermes_cli.config import load_config + + cfg = load_config() + value = ( + (cfg.get("auxiliary") or {}) + .get("goal_judge", {}) + .get("max_tokens", DEFAULT_JUDGE_MAX_TOKENS) + ) + value = int(value) + if value > 0: + return value + except Exception: + pass + return DEFAULT_JUDGE_MAX_TOKENS + + def _parse_judge_response(raw: str) -> Tuple[bool, str, bool]: """Parse the judge's reply. Fail-open to ``(False, "", parse_failed)``. @@ -404,7 +438,7 @@ def judge_goal( {"role": "user", "content": prompt}, ], temperature=0, - max_tokens=200, + max_tokens=_goal_judge_max_tokens(), timeout=timeout, extra_body=get_auxiliary_extra_body() or None, ) diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 214a1855b30a..833172a23b9c 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -5681,21 +5681,50 @@ def _build_web_ui(web_dir: Path, *, fatal: bool = False) -> bool: if not _web_ui_build_needed(web_dir): return True + # Console-encoding-safe print: Windows consoles default to cp1252 + # (or similar) and will raise UnicodeEncodeError on arrow / check + # glyphs unless PYTHONIOENCODING=utf-8 is set. Routing every print + # in this function through _say() with errors="replace" keeps the + # build path usable on a stock `py -m hermes_cli.main web` invocation. + def _say(text: str) -> None: + try: + print(text) + except UnicodeEncodeError: + encoding = getattr(sys.stdout, "encoding", None) or "ascii" + print(text.encode(encoding, errors="replace").decode(encoding, errors="replace")) + npm = shutil.which("npm") if not npm: if fatal: - print("Web UI frontend not built and npm is not available.") - print("Install Node.js, then run: cd web && npm install && npm run build") + _say("Web UI frontend not built and npm is not available.") + _say("Install Node.js, then run: cd web && npm install && npm run build") return not fatal - print("โ†’ Building web UI...") + _say("โ†’ Building web UI...") + + def _relay(result: "subprocess.CompletedProcess") -> None: + """Print captured npm output so users can see *why* a step failed. + + Windows users hitting `rm -rf` / `cp -r` errors (or any other + sync-assets / Vite failure) would otherwise see only ``Web UI + build failed`` with no hint of the underlying cause, because + the npm calls run with ``capture_output=True``. + """ + for blob in (result.stdout, result.stderr): + if not blob: + continue + text = blob.decode("utf-8", errors="replace").rstrip() if isinstance(blob, bytes) else blob.rstrip() + if text: + _say(text) + r1 = _run_npm_install_deterministic(npm, web_dir, extra_args=("--silent",)) if r1.returncode != 0: - print( + _say( f" {'โœ—' if fatal else 'โš '} Web UI npm install failed" + ("" if fatal else " (hermes web will not be available)") ) + _relay(r1) if fatal: - print(" Run manually: cd web && npm install && npm run build") + _say(" Run manually: cd web && npm install && npm run build") return False # First attempt r2 = subprocess.run( @@ -5730,21 +5759,20 @@ def _build_web_ui(web_dir: Path, *, fatal: bool = False) -> bool: # A stale UI is far better than no UI for non-interactive callers # (Windows Scheduled Tasks, CI) โ€” issue #23817. if dist_index.exists(): - print(" โš  Web UI build failed โ€” serving stale dist as fallback") + _say(" โš  Web UI build failed โ€” serving stale dist as fallback") if stderr_tail: - print(f" Build error:\n {stderr_tail}") + _say(f" Build error:\n {stderr_tail}") return True - print( + _say( f" {'โœ—' if fatal else 'โš '} Web UI build failed" + ("" if fatal else " (hermes web will not be available)") ) - if stderr_tail: - print(f" Build error:\n {stderr_tail}") + _relay(r2) if fatal: - print(" Run manually: cd web && npm install && npm run build") + _say(" Run manually: cd web && npm install && npm run build") return False - print(" โœ“ Web UI built") + _say(" โœ“ Web UI built") return True @@ -11671,16 +11699,57 @@ def cmd_claw(args): description="Start Hermes Agent in ACP mode for editor integration (VS Code, Zed, JetBrains)", ) _add_accept_hooks_flag(acp_parser) + acp_parser.add_argument( + "--version", + action="store_true", + dest="acp_version", + help="Print Hermes ACP version and exit", + ) + acp_parser.add_argument( + "--check", + action="store_true", + help="Verify ACP dependencies and adapter imports, then exit", + ) + acp_parser.add_argument( + "--setup", + action="store_true", + help="Run interactive Hermes provider/model setup for ACP terminal auth", + ) + acp_parser.add_argument( + "--setup-browser", + action="store_true", + help="Install agent-browser + Playwright Chromium into ~/.hermes/node/ " + "for browser tool support (idempotent).", + ) + acp_parser.add_argument( + "--yes", + "-y", + action="store_true", + dest="assume_yes", + help="Accept all prompts (used by --setup-browser to skip the " + "~400 MB Chromium download confirmation).", + ) def cmd_acp(args): """Launch Hermes Agent as an ACP server.""" try: from acp_adapter.entry import main as acp_main - acp_main() + acp_argv = [] + if getattr(args, "acp_version", False): + acp_argv.append("--version") + if getattr(args, "check", False): + acp_argv.append("--check") + if getattr(args, "setup", False): + acp_argv.append("--setup") + if getattr(args, "setup_browser", False): + acp_argv.append("--setup-browser") + if getattr(args, "assume_yes", False): + acp_argv.append("--yes") + acp_main(acp_argv) except ImportError: - print("ACP dependencies not installed.") - print("Install them with: pip install -e '.[acp]'") + print("ACP dependencies not installed.", file=sys.stderr) + print("Install them with: pip install -e '.[acp]'", file=sys.stderr) sys.exit(1) acp_parser.set_defaults(func=cmd_acp) diff --git a/hermes_cli/mcp_config.py b/hermes_cli/mcp_config.py index 8c12ad707581..ed9d7b5f6dbc 100644 --- a/hermes_cli/mcp_config.py +++ b/hermes_cli/mcp_config.py @@ -25,6 +25,7 @@ ) from hermes_cli.colors import Colors, color from hermes_constants import display_hermes_home +from tools.mcp_tool import _ENV_VAR_PATTERN logger = logging.getLogger(__name__) @@ -551,7 +552,7 @@ def cmd_mcp_test(args): for k, v in headers.items(): if isinstance(v, str) and ("key" in k.lower() or "auth" in k.lower()): # Mask the value - resolved = _interpolate_value(v) + resolved = _ENV_VAR_PATTERN.sub(lambda m: os.getenv(m.group(1), ""), v) if len(resolved) > 8: masked = resolved[:4] + "***" + resolved[-4:] else: @@ -581,13 +582,6 @@ def cmd_mcp_test(args): print() -def _interpolate_value(value: str) -> str: - """Resolve ``${ENV_VAR}`` references in a string.""" - def _replace(m): - return os.getenv(m.group(1), "") - return re.sub(r"\$\{(\w+)\}", _replace, value) - - # โ”€โ”€โ”€ hermes mcp login โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ def cmd_mcp_login(args): diff --git a/hermes_cli/models.py b/hermes_cli/models.py index 1ffede636a12..bc41132f5d51 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -3702,13 +3702,12 @@ def validate_requested_model( # Static-catalog fallback: when the /models probe was unreachable, # validate against the curated list from provider_model_ids() โ€” same - # pattern as the openai-codex and minimax branches above. This fixes - # /model switches in the gateway for providers like opencode-go and - # opencode-zen whose /models endpoint returns 404 against the HTML - # marketing site. Without this block, validate_requested_model would - # reject every model on such providers, switch_model() would return - # success=False, and the gateway would never write to - # _session_model_overrides. + # pattern as the openai-codex and minimax branches above. This keeps + # /model switches working in the gateway for providers whose /models + # endpoint is temporarily unreachable or returns a non-JSON payload. + # Without this block, validate_requested_model would reject every model + # on such providers, switch_model() would return success=False, and + # the gateway would never write to _session_model_overrides. provider_label = _PROVIDER_LABELS.get(normalized, normalized) try: catalog_models = provider_model_ids(normalized) diff --git a/hermes_cli/proxy/server.py b/hermes_cli/proxy/server.py index 223bc3bd62db..48de784afe4f 100644 --- a/hermes_cli/proxy/server.py +++ b/hermes_cli/proxy/server.py @@ -243,7 +243,7 @@ async def run_server( loop = asyncio.get_running_loop() for sig in (signal.SIGINT, signal.SIGTERM): try: - loop.add_signal_handler(sig, stop_event.set) + loop.add_signal_handler(sig, stop_event.set) # windows-footgun: ok except NotImplementedError: # Windows / restricted environments โ€” Ctrl+C will still # raise KeyboardInterrupt and unwind us. diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py index 4ac21ea45687..d7c30fe5648c 100644 --- a/hermes_cli/runtime_provider.py +++ b/hermes_cli/runtime_provider.py @@ -102,8 +102,10 @@ def _auto_detect_local_model(base_url: str) -> str: model_id = models[0].get("id", "") if model_id: return model_id - except Exception: - pass + except Exception as exc: + # Log instead of silently swallowing โ€” aids debugging when + # local model auto-detection fails unexpectedly. + logger.debug("Auto-detect model from %s failed: %s", base_url, exc) return "" diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 6a8bf950589c..5d635b2c464c 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -522,14 +522,6 @@ def _print_setup_summary(config: dict, hermes_home): elif managed_nous_tools_enabled() and subscription_features.nous_auth_present: tool_status.append(("Modal Execution (optional via Nous subscription)", True, None)) - # Tinker + WandB (RL training) - if get_env_value("TINKER_API_KEY") and get_env_value("WANDB_API_KEY"): - tool_status.append(("RL Training (Tinker)", True, None)) - elif get_env_value("TINKER_API_KEY"): - tool_status.append(("RL Training (Tinker)", False, "WANDB_API_KEY")) - else: - tool_status.append(("RL Training (Tinker)", False, "TINKER_API_KEY")) - # Home Assistant if get_env_value("HASS_TOKEN"): tool_status.append(("Smart Home (Home Assistant)", True, None)) diff --git a/hermes_cli/skin_engine.py b/hermes_cli/skin_engine.py index f4d894c1e7ab..0946eae91682 100644 --- a/hermes_cli/skin_engine.py +++ b/hermes_cli/skin_engine.py @@ -849,10 +849,14 @@ def get_prompt_toolkit_style_overrides() -> Dict[str, str]: except Exception: return {} - prompt = skin.get_color("prompt", "#FFF8DC") + # Input/prompt: leave unset by default so the typed text inherits + # the terminal's foreground color (readable in both light and dark + # color schemes). Skins can opt into a colored prompt by setting + # `prompt` explicitly in their YAML. + prompt = skin.get_color("prompt", "") input_rule = skin.get_color("input_rule", "#CD7F32") title = skin.get_color("banner_title", "#FFD700") - text = skin.get_color("banner_text", prompt) + text = skin.get_color("banner_text", "#FFF8DC") dim = skin.get_color("banner_dim", "#555555") label = skin.get_color("ui_label", title) warn = skin.get_color("ui_warn", "#FF8C00") @@ -872,7 +876,11 @@ def get_prompt_toolkit_style_overrides() -> Dict[str, str]: menu_meta_current_bg = skin.get_color("completion_menu_meta_current_bg", menu_current_bg) return { - "input-area": prompt, + # Typed input always uses terminal default fg/bg so it's + # readable in both light and dark Terminal.app modes. The + # skin's `prompt` color (if any) only styles the prompt symbol, + # NOT the user's typed text. + "input-area": "", "placeholder": f"{dim} italic", "prompt": prompt, "prompt-working": f"{dim} italic", diff --git a/hermes_cli/status.py b/hermes_cli/status.py index b4417091ca7b..00ac21ceb8a5 100644 --- a/hermes_cli/status.py +++ b/hermes_cli/status.py @@ -141,8 +141,6 @@ def show_status(args): "Browser Use": "BROWSER_USE_API_KEY", # Optional โ€” local browser works without this "Browserbase": "BROWSERBASE_API_KEY", # Optional โ€” direct credentials only "FAL": "FAL_KEY", - "Tinker": "TINKER_API_KEY", - "WandB": "WANDB_API_KEY", "ElevenLabs": "ELEVENLABS_API_KEY", "GitHub": "GITHUB_TOKEN", } @@ -404,6 +402,7 @@ def _resolve_env(env_ref) -> str: "BlueBubbles": ("BLUEBUBBLES_SERVER_URL", "BLUEBUBBLES_HOME_CHANNEL"), "QQBot": ("QQ_APP_ID", "QQ_HOME_CHANNEL"), "Yuanbao": ("YUANBAO_APP_ID", "YUANBAO_HOME_CHANNEL"), + "Tlon": ("TLON_SHIP_URL", "TLON_HOME_CHANNEL"), } for name, (token_var, home_var) in platforms.items(): diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py index 874740405301..fc5b1acf5cfa 100644 --- a/hermes_cli/tools_config.py +++ b/hermes_cli/tools_config.py @@ -71,7 +71,6 @@ ("delegation", "๐Ÿ‘ฅ Task Delegation", "delegate_task"), ("cronjob", "โฐ Cron Jobs", "create/list/update/pause/resume/run, with optional attached skills"), ("messaging", "๐Ÿ“จ Cross-Platform Messaging", "send_message"), - ("rl", "๐Ÿงช RL Training", "Tinker-Atropos training tools"), ("homeassistant", "๐Ÿ  Home Assistant", "smart home device control"), ("spotify", "๐ŸŽต Spotify", "playback, search, playlists, library"), ("discord", "๐Ÿ’ฌ Discord (read/participate)", "fetch messages, search members, create thread"), @@ -87,7 +86,7 @@ # Video gen is off by default โ€” it's a niche, paid, slow feature. Users # who want it opt in via `hermes tools` โ†’ Video Generation, which walks # them through provider + model selection. -_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "rl", "spotify", "discord", "discord_admin", "video", "video_gen"} +_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "spotify", "discord", "discord_admin", "video", "video_gen"} # Platform-scoped toolsets: only appear in the `hermes tools` checklist for # these platforms, and only resolve/save for these platforms. A toolset @@ -424,22 +423,6 @@ def _get_plugin_toolset_keys() -> set: }, ], }, - "rl": { - "name": "RL Training", - "icon": "๐Ÿงช", - "requires_python": (3, 11), - "providers": [ - { - "name": "Tinker / Atropos", - "tag": "RL training platform", - "env_vars": [ - {"key": "TINKER_API_KEY", "prompt": "Tinker API key", "url": "https://tinker-console.thinkingmachines.ai/keys"}, - {"key": "WANDB_API_KEY", "prompt": "WandB API key", "url": "https://wandb.ai/authorize"}, - ], - "post_setup": "rl_training", - }, - ], - }, "langfuse": { "name": "Langfuse Observability", "icon": "๐Ÿ“Š", @@ -912,24 +895,6 @@ def _run_post_setup(post_setup_key: str): _print_warning(f" Spotify login failed: {exc}") _print_info(" Run manually: hermes auth spotify") - elif post_setup_key == "rl_training": - try: - __import__("tinker_atropos") - except ImportError: - tinker_dir = PROJECT_ROOT / "tinker-atropos" - if tinker_dir.exists() and (tinker_dir / "pyproject.toml").exists(): - _print_info(" Installing tinker-atropos submodule...") - result = _pip_install(["-e", str(tinker_dir)]) - if result.returncode == 0: - _print_success(" tinker-atropos installed") - else: - _print_warning(" tinker-atropos install failed - run manually:") - _print_info(' uv pip install -e "./tinker-atropos"') - else: - _print_warning(" tinker-atropos submodule not found - run:") - _print_info(" git submodule update --init --recursive") - _print_info(' uv pip install -e "./tinker-atropos"') - elif post_setup_key == "langfuse": # Install the langfuse SDK. try: diff --git a/model_tools.py b/model_tools.py index 0b9178111a50..db19bb67e539 100644 --- a/model_tools.py +++ b/model_tools.py @@ -97,9 +97,7 @@ def _run_async(coro): asyncio.run()'s create-and-destroy lifecycle. This is the single source of truth for sync->async bridging in tool - handlers. The RL paths (agent_loop.py, tool_context.py) also provide - outer thread-pool wrapping as defense-in-depth, but each handler is - self-protecting via this function. + handlers. Each handler is self-protecting via this function. """ try: loop = asyncio.get_running_loop() @@ -231,13 +229,6 @@ def _run_in_worker(): "browser_vision", "browser_console" ], "cronjob_tools": ["cronjob"], - "rl_tools": [ - "rl_list_environments", "rl_select_environment", - "rl_get_current_config", "rl_edit_config", - "rl_start_training", "rl_check_status", - "rl_stop_training", "rl_get_results", - "rl_list_runs", "rl_test_inference" - ], "file_tools": ["read_file", "write_file", "patch", "search_files"], "tts_tools": ["text_to_speech"], } diff --git a/nix/hermes-agent.nix b/nix/hermes-agent.nix index ce8be16cfddf..6c391878cc54 100644 --- a/nix/hermes-agent.nix +++ b/nix/hermes-agent.nix @@ -192,7 +192,6 @@ stdenv.mkDerivation { source .venv/bin/activate uv pip install -e ".[all]" [ -d mini-swe-agent ] && uv pip install -e ./mini-swe-agent 2>/dev/null || true - [ -d tinker-atropos ] && uv pip install -e ./tinker-atropos 2>/dev/null || true mkdir -p .nix-stamps echo "$STAMP_VALUE" > "$STAMP" else diff --git a/optional-skills/mlops/hermes-atropos-environments/SKILL.md b/optional-skills/mlops/hermes-atropos-environments/SKILL.md deleted file mode 100644 index 6766c381014f..000000000000 --- a/optional-skills/mlops/hermes-atropos-environments/SKILL.md +++ /dev/null @@ -1,303 +0,0 @@ ---- -name: hermes-atropos-environments -description: Build, test, and debug Hermes Agent RL environments for Atropos training. Covers the HermesAgentBaseEnv interface, reward functions, agent loop integration, evaluation with tools, wandb logging, and the three CLI modes (serve/process/evaluate). Use when creating, reviewing, or fixing RL environments in the hermes-agent repo. -version: 1.1.0 -author: Hermes Agent -license: MIT -platforms: [linux, macos, windows] -metadata: - hermes: - tags: [atropos, rl, environments, training, reinforcement-learning, reward-functions] - related_skills: [axolotl, fine-tuning-with-trl, lm-evaluation-harness] ---- - -# Hermes Agent Atropos Environments - -Guide for building RL environments in the hermes-agent repo that integrate with the Atropos training framework. - -## Architecture Overview - -``` -Atropos BaseEnv (atroposlib/envs/base.py) - โ””โ”€โ”€ HermesAgentBaseEnv (environments/hermes_base_env.py) - โ”œโ”€โ”€ Handles agent loop orchestration - โ”œโ”€โ”€ Handles tool resolution per group - โ”œโ”€โ”€ Handles ToolContext for reward verification - โ””โ”€โ”€ YOUR ENVIRONMENT (environments/your_env.py) - Only implements: setup, get_next_item, format_prompt, - compute_reward, evaluate, wandb_log -``` - -Hermes environments are special because they run a **multi-turn agent loop with tool calling** โ€” not just single-turn completions. The base env handles the loop; you implement the task and scoring. - -## File Locations - -| File | Purpose | -|------|---------| -| `environments/hermes_base_env.py` | Base class with agent loop + tool resolution | -| `environments/agent_loop.py` | `HermesAgentLoop` + `AgentResult` dataclass | -| `environments/tool_context.py` | `ToolContext` for reward verification | -| `environments/tool_call_parsers.py` | Phase 2 tool call parsers (hermes, mistral, etc.) | -| `environments/your_env.py` | Your environment implementation | - -## Inference Setup โ€” Ask the User First - -**IMPORTANT:** Before running any test, evaluation, or data generation command, always ask the user how they want to handle inference. Do NOT assume OpenRouter or any specific endpoint. Present these options: - -1. **OpenRouter** โ€” Ask which model they want to use (e.g., `anthropic/claude-sonnet-4.5`, `google/gemini-2.5-pro`, `meta-llama/llama-3.3-70b-instruct`, etc.). Requires `OPENROUTER_API_KEY` in environment. -2. **Self-hosted VLLM endpoint** โ€” Ask for their base URL (e.g., `http://localhost:8000/v1`) and model name. Set `--openai.server_type vllm`. -3. **Other OpenAI-compatible API** โ€” Ask for the base URL, model name, and any required API key. Set `--openai.server_type openai` and `--openai.health_check false`. -4. **Local Atropos training server** โ€” For `serve` mode with a live training loop. Default `http://localhost:8000/v1`. - -Once the user tells you their setup, use those values in all CLI commands for that session. Example prompts: - -> "Before I run this, how would you like to handle inference? -> 1. OpenRouter (I'll need your preferred model, e.g. claude-sonnet-4.5) -> 2. A self-hosted VLLM endpoint (give me the URL and model name) -> 3. Another OpenAI-compatible API (give me the URL, model, and any auth details) -> 4. Local Atropos training server (serve mode)" - -### Key flags by provider: - -| Provider | `--openai.server_type` | `--openai.health_check` | `--openai.api_key` | -|----------|----------------------|------------------------|-------------------| -| OpenRouter | `openai` | `false` | `$OPENROUTER_API_KEY` | -| VLLM (self-hosted) | `vllm` | (default) | (not needed) | -| Other OpenAI-compatible | `openai` | `false` | As needed | -| Local Atropos | (default) | (default) | (not needed) | - -## Required Methods - -### 1. `setup()` โ€” Load dataset and initialize state - -```python -async def setup(self) -> None: - """Called once at startup. Load datasets, initialize state.""" - # Try HuggingFace first, fallback to built-in samples - try: - from datasets import load_dataset - ds = load_dataset("your/dataset", split="test") - self._items = [...] - except Exception: - self._items = BUILTIN_SAMPLES - - # Always split into train/eval - random.shuffle(self._items) - eval_size = max(20, int(len(self._items) * 0.1)) - self._eval_items = self._items[:eval_size] - self._items = self._items[eval_size:] -``` - -### 2. `get_next_item()` โ€” Return next training item - -```python -async def get_next_item(self) -> dict: - """Return next item, cycling through dataset.""" - item = self._items[self._index % len(self._items)] - self._index += 1 - return item -``` - -### 3. `format_prompt(item)` โ€” Convert item to user message - -```python -def format_prompt(self, item: dict) -> str: - """Convert a dataset item into the user-facing prompt.""" - return f"Research this question: {item['question']}" -``` - -### 4. `compute_reward(item, result, ctx)` โ€” Score the rollout - -**CRITICAL**: `result` is an `AgentResult`, NOT a dict. It has these attributes: -- `result.messages` โ€” List of message dicts (OpenAI format) -- `result.turns_used` โ€” Number of LLM calls made -- `result.finished_naturally` โ€” True if model stopped voluntarily -- `result.tool_errors` โ€” List of ToolError objects - -**AgentResult does NOT have**: `final_response`, `tool_calls`, `tools_used`. -You must extract these from `result.messages`: - -```python -async def compute_reward(self, item, result: AgentResult, ctx: ToolContext) -> float: - # Extract final response (last assistant message with content) - final_response = "" - tools_used = [] - for msg in reversed(result.messages): - if msg.get("role") == "assistant" and msg.get("content") and not final_response: - final_response = msg["content"] - if msg.get("role") == "assistant" and msg.get("tool_calls"): - for tc in msg["tool_calls"]: - fn = tc.get("function", {}) if isinstance(tc, dict) else {} - name = fn.get("name", "") - if name: - tools_used.append(name) - - # Score using LLM judge, heuristic, or ToolContext verification - correctness = await self._llm_judge(item, final_response) - return correctness -``` - -`ctx` (ToolContext) gives you terminal/file access to the agent's sandbox for verification: -```python -# Run tests in the agent's sandbox -result = ctx.terminal("pytest /workspace/test.py") -return 1.0 if result["exit_code"] == 0 else 0.0 -``` - -### 5. `evaluate()` โ€” Periodic evaluation with full agent loop - -**MUST use the full agent loop with tools**, not single-turn chat_completion. -The whole point of hermes-agent environments is agentic evaluation: - -```python -async def evaluate(self, *args, **kwargs) -> None: - import time, uuid - from environments.agent_loop import HermesAgentLoop - from environments.tool_context import ToolContext - - start_time = time.time() - tools, valid_names = self._resolve_tools_for_group() - samples = [] - - for item in self._eval_items[:self.config.eval_size]: - task_id = str(uuid.uuid4()) - messages = [] - if self.config.system_prompt: - messages.append({"role": "system", "content": self.config.system_prompt}) - messages.append({"role": "user", "content": self.format_prompt(item)}) - - agent = HermesAgentLoop( - server=self.server, - tool_schemas=tools, - valid_tool_names=valid_names, - max_turns=self.config.max_agent_turns, - task_id=task_id, - temperature=0.0, # Deterministic for eval - max_tokens=self.config.max_token_length, - extra_body=self.config.extra_body, - ) - result = await agent.run(messages) - - ctx = ToolContext(task_id) - try: - reward = await self.compute_reward(item, result, ctx) - finally: - ctx.cleanup() - - samples.append({"prompt": ..., "response": ..., "reward": reward}) - - eval_metrics = {"eval/mean_reward": ...} - await self.evaluate_log(metrics=eval_metrics, samples=samples, - start_time=start_time, end_time=time.time()) -``` - -### 6. `wandb_log()` โ€” Custom metrics logging - -Always call `super().wandb_log()` at the end: - -```python -async def wandb_log(self, wandb_metrics=None): - if wandb_metrics is None: - wandb_metrics = {} - if self._reward_buffer: - n = len(self._reward_buffer) - wandb_metrics["train/mean_reward"] = sum(self._reward_buffer) / n - self._reward_buffer.clear() - await super().wandb_log(wandb_metrics) # MUST call super -``` - -**Pitfall**: `compute_reward` appends to metric buffers. During eval, this pollutes training metrics. Roll back buffer entries added during eval. - -## Config Class - -Always create a custom config subclass with Pydantic Field descriptors. Key inherited fields you can tune: `enabled_toolsets`, `max_agent_turns`, `agent_temperature`, `system_prompt`, `terminal_backend`, `group_size`, `steps_per_eval`, `total_steps`. - -## config_init() โ€” Default Configuration - -Classmethod returning `(YourEnvConfig, [APIServerConfig(...)])`. Set server_type to "openai" for OpenRouter/external APIs. Load API key from environment variable. - -## Three CLI Modes - -```bash -# SERVE โ€” Full training loop (connects to Atropos API server) -python environments/my_env.py serve --openai.base_url http://localhost:8000/v1 - -# PROCESS โ€” Offline data generation (saves JSONL) -python environments/my_env.py process --env.total_steps 10 --env.group_size 1 \ - --env.use_wandb false --env.data_path_to_save_groups output.jsonl \ - --openai.base_url "" \ - --openai.model_name "" \ - --openai.server_type --openai.health_check false - -# EVALUATE โ€” Standalone eval (runs setup + evaluate only) -python environments/my_env.py evaluate --env.eval_size 20 \ - --env.data_dir_to_save_evals /tmp/eval_results \ - --openai.base_url "" \ - --openai.model_name "" \ - --openai.server_type --openai.health_check false -``` - -Config priority: CLI args > YAML file > config_init() defaults. - -## Common Pitfalls - -1. **AgentResult has .messages, not .final_response** โ€” Extract the final response by iterating reversed(result.messages) looking for the last assistant message with content. - -2. **evaluate() must use HermesAgentLoop, not chat_completion** โ€” Single-turn chat_completion has no tools. The whole point of hermes-agent benchmarks is agentic evaluation with tool use. - -3. **Don't call _llm_judge twice** โ€” If compute_reward already calls it, extract the score from the buffer instead of calling judge separately in evaluate(). - -4. **Eval pollutes training buffers** โ€” compute_reward appends to metric buffers. During eval, roll back buffer entries to keep training metrics clean. - -5. **Always set health_check=false for OpenRouter** โ€” OpenRouter has no /health endpoint. - -6. **Set data_dir_to_save_evals in evaluate mode** โ€” Without it, results aren't saved. - -7. **default_toolsets class variable vs enabled_toolsets config** โ€” The class variable is a hint; the config field is what actually controls tool resolution. - -8. **Tool call parsing in messages** โ€” Tool calls are dicts with `{"function": {"name": ..., "arguments": ...}}`. Always check `isinstance(tc, dict)`. - -9. **ToolContext.cleanup()** โ€” Always call in a finally block to release sandbox resources. - -10. **server_type must be "openai" for external APIs** โ€” Without it, Atropos assumes a local VLLM server. - -11. **Always ask the user for their inference setup** โ€” Never hardcode or assume a specific provider/model. See the "Inference Setup" section above. - -## Reward Function Patterns - -### LLM Judge (for open-ended tasks) -Use `self.server.chat_completion()` with a scoring prompt. Parse JSON response for score float. Always include a heuristic fallback (keyword overlap) for when the judge call fails. - -### Binary Verification (for code/terminal tasks) -Use `ctx.terminal("pytest test.py -q")` to run tests in the agent's sandbox. Return 1.0 for pass, 0.0 for fail. - -### Multi-Signal (combine multiple indicators) -Weight correctness (0.6) + tool usage (0.2) + efficiency (0.2) + optional bonuses. Clamp to [0, 1]. - -## Testing Your Environment - -1. **Import test**: `python -c "from environments.my_env import MyEnv; print('OK')"` -2. **Ask the user for inference setup** (see "Inference Setup" section above) -3. **Process mode** (1 item): Verify JSONL output has valid tokens, masks, scores -4. **Evaluate mode**: Verify full agent loop runs with tools, metrics logged correctly -5. **Check reward range**: Scores should be in [0, 1], not all identical - -## Minimum Implementation Checklist - -```python -class MyEnv(HermesAgentBaseEnv): - name = "my-env" - env_config_cls = MyEnvConfig - - @classmethod - def config_init(cls): ... # Default server + env config - async def setup(self): ... # Load dataset + train/eval split - async def get_next_item(self): ... # Cycle through training items - def format_prompt(self, item): ... # Item โ†’ user message string - async def compute_reward(self, item, result, ctx): ... # Score rollout - async def evaluate(self, *args, **kwargs): ... # Full agent loop eval - async def wandb_log(self, metrics=None): ... # Custom metrics + super() - -if __name__ == "__main__": - MyEnv.cli() -``` diff --git a/optional-skills/mlops/hermes-atropos-environments/references/agentresult-fields.md b/optional-skills/mlops/hermes-atropos-environments/references/agentresult-fields.md deleted file mode 100644 index bc6d6050581c..000000000000 --- a/optional-skills/mlops/hermes-atropos-environments/references/agentresult-fields.md +++ /dev/null @@ -1,59 +0,0 @@ -# AgentResult Fields Reference - -`AgentResult` is defined in `environments/agent_loop.py` as a dataclass. - -## Fields - -| Field | Type | Description | -|-------|------|-------------| -| `messages` | `List[Dict[str, Any]]` | Full conversation history in OpenAI message format | -| `managed_state` | `Optional[Dict]` | ManagedServer.get_state() if Phase 2, else None | -| `turns_used` | `int` | Number of LLM calls made during the loop | -| `finished_naturally` | `bool` | True if model stopped calling tools on its own | -| `reasoning_per_turn` | `List[Optional[str]]` | Extracted reasoning content per turn | -| `tool_errors` | `List[ToolError]` | Tool errors encountered during the loop | - -## ToolError Fields - -| Field | Type | Description | -|-------|------|-------------| -| `turn` | `int` | Which turn the error occurred | -| `tool_name` | `str` | Name of the tool that failed | -| `arguments` | `str` | Arguments passed to the tool | -| `error` | `str` | Error message | -| `tool_result` | `str` | The result returned to the model | - -## Extracting Data from Messages - -Messages follow OpenAI format. Common patterns: - -```python -# Get final assistant response -for msg in reversed(result.messages): - if msg.get("role") == "assistant" and msg.get("content"): - final_response = msg["content"] - break - -# Get all tool names used -tools = [] -for msg in result.messages: - if msg.get("role") == "assistant" and msg.get("tool_calls"): - for tc in msg["tool_calls"]: - fn = tc.get("function", {}) if isinstance(tc, dict) else {} - tools.append(fn.get("name", "")) - -# Get tool results -for msg in result.messages: - if msg.get("role") == "tool": - tool_output = msg.get("content", "") - call_id = msg.get("tool_call_id", "") -``` - -## Fields that DO NOT EXIST - -These are common mistakes โ€” AgentResult does NOT have: -- `final_response` โ€” extract from messages -- `tool_calls` โ€” extract from messages -- `tools_used` โ€” extract from messages -- `output` โ€” extract from messages -- `response` โ€” extract from messages diff --git a/optional-skills/mlops/hermes-atropos-environments/references/atropos-base-env.md b/optional-skills/mlops/hermes-atropos-environments/references/atropos-base-env.md deleted file mode 100644 index e76895905e1d..000000000000 --- a/optional-skills/mlops/hermes-atropos-environments/references/atropos-base-env.md +++ /dev/null @@ -1,65 +0,0 @@ -# Atropos BaseEnv Reference - -Source: `atroposlib/envs/base.py` (~2124 lines) - -## Abstract Methods (MUST implement) - -| Method | Signature | Description | -|--------|-----------|-------------| -| `get_next_item()` | `async def get_next_item(self) -> Item` | Return next item for trajectory. Return None to pause. | -| `evaluate()` | `async def evaluate(self, *args, **kwargs)` | Called every steps_per_eval steps. | -| `setup()` | `async def setup(self)` | Called once at start. Load datasets, init models. | -| `collect_trajectory()` | `async def collect_trajectory(self, item) -> Tuple[Optional[ScoredDataItem], List[Item]]` | Single rollout. Or override collect_trajectories instead. | - -## Overridable Methods - -| Method | Default Behavior | Override When | -|--------|-----------------|---------------| -| `collect_trajectories()` | Runs collect_trajectory group_size times in parallel | Batch generation, MCTS, coupled rollouts | -| `wandb_log()` | Logs completion lengths, rollout table, perf stats | Add custom metrics (always call super) | -| `config_init()` | Returns (env_config_cls(), ServerBaseline()) | Custom defaults + server configs | -| `postprocess_histories()` | Passthrough | Final processing before sending to trainer | -| `save_checkpoint()` | Saves JSON to checkpoint_dir | Custom serialization | -| `cleanup()` | No-op | Release resources after each rollout | - -## ScoredDataGroup Structure - -```python -ScoredDataGroup = TypedDict with: - tokens: List[List[int]] # Token IDs per rollout - masks: List[List[int]] # -100=prompt, token_id=completion - scores: List[float] # Score per rollout - advantages: Optional[...] # Per-token advantages - ref_logprobs: Optional[...] # Reference model logprobs - messages: Optional[...] # OpenAI-format messages - inference_logprobs: Optional[...] # Inference logprobs -``` - -## BaseEnvConfig Key Fields - -| Field | Default | Description | -|-------|---------|-------------| -| `group_size` | 4 | Responses grouped for scoring | -| `steps_per_eval` | 100 | Steps between evaluations | -| `max_token_length` | 2048 | Max token length for generations | -| `total_steps` | 1000 | Total training steps | -| `use_wandb` | True | Enable wandb logging | -| `tokenizer_name` | DeepHermes-3 | Tokenizer for token encoding | -| `ensure_scores_are_not_same` | True | Skip groups with identical scores | -| `worker_timeout` | 600 | Task timeout seconds | - -## Data Flow - -``` -env_manager() โ†’ add_train_workers() โ†’ handle_env() - โ†’ collect_trajectories() โ†’ postprocess_histories() - โ†’ handle_send_to_api() โ†’ training server -``` - -## Atropos Environment Statistics (82 environments analyzed) - -- 95% implement setup, collect_trajectories, evaluate, get_next_item -- 76% override wandb_log -- 54% have custom config class -- Most use collect_trajectories (plural), not collect_trajectory (singular) -- Common reward patterns: LLM-judge (~40), regex-extract (~35), code-exec (~12) diff --git a/optional-skills/mlops/hermes-atropos-environments/references/usage-patterns.md b/optional-skills/mlops/hermes-atropos-environments/references/usage-patterns.md deleted file mode 100644 index 5d4b3c1e8203..000000000000 --- a/optional-skills/mlops/hermes-atropos-environments/references/usage-patterns.md +++ /dev/null @@ -1,199 +0,0 @@ -# Usage Patterns โ€” Testing Environments and Evaluating Models - -## Pattern 1: Test Your Environment Works (process mode) - -Use `process` mode to verify your environment runs end-to-end before -committing. This generates trajectories without needing an Atropos -training server. - -**Before running:** Ask the user for their inference setup (see SKILL.md "Inference Setup" section). Replace ``, ``, and `` below with their chosen values. - -### Step 1: Run 1 trajectory - -```bash -cd ~/.hermes/hermes-agent -source venv/bin/activate - -python environments/your_env.py process \ - --env.total_steps 1 \ - --env.group_size 1 \ - --env.use_wandb false \ - --env.data_path_to_save_groups /tmp/test_output.jsonl \ - --openai.base_url "" \ - --openai.model_name "" \ - --openai.server_type \ - --openai.health_check false -``` - -### Step 2: Verify the output - -```python -import json -for line in open("/tmp/test_output.jsonl"): - data = json.loads(line) - print(f"Scores: {data.get('scores', [])}") - print(f"Token sequences: {len(data.get('tokens', []))}") - # Check messages include tool calls - for msg_list in data.get("messages", []): - roles = [m.get("role") for m in msg_list] - print(f"Roles: {roles}") - for m in reversed(msg_list): - if m.get("role") == "assistant" and m.get("content"): - print(f"Response: {m['content'][:200]}...") - break -``` - -### What to check: -- **Scores are not all 0.0** โ€” if so, compute_reward is broken -- **Scores are in [0, 1]** โ€” not negative, not >1 -- **Messages include "tool" role entries** โ€” agent used tools -- **Token sequences are non-empty** -- **An HTML visualization is generated** next to the .jsonl - -### Common failures: -- `'AgentResult' object has no attribute 'X'` โ€” accessing a field that doesn't exist. See agentresult-fields.md. -- Score always 0.0 โ€” reward function erroring silently -- Score always 1.0 โ€” verification too lenient or not running - - -## Pattern 2: Evaluate a Model (evaluate mode) - -Use `evaluate` mode to benchmark a model on your environment's eval -split. This runs the full agent loop with tools for each eval item. - -### Step 1: Run evaluation - -```bash -python environments/your_env.py evaluate \ - --env.eval_size 20 \ - --env.use_wandb false \ - --env.data_dir_to_save_evals /tmp/eval_results \ - --openai.base_url "" \ - --openai.model_name "" \ - --openai.server_type \ - --openai.health_check false -``` - -### Step 2: Read results - -Stdout shows a lighteval-compatible table: - -``` -Evaluation Results: your-env_eval -|Metric | Value| -|mean correctness| 0.850 | -|mean reward | 0.920 | -|mean tool calls | 4.300 | -|n items | 20 | -Evaluation completed in 367 seconds -``` - -JSON results saved to the eval directory: - -```python -import json -data = json.load(open("/tmp/eval_results/metrics.json")) -for metric, value in data["results"]["all"].items(): - print(f"{metric}: {value}") -``` - -### Step 3: Compare models - -Run evaluate with different models and compare the metrics.json files. - -### What to check: -- **"data_dir_to_save_evals is not set"** โ€” you forgot the flag, results won't be saved -- **Tool usage rate = 0** โ€” evaluate() is using chat_completion instead of HermesAgentLoop -- **All scores identical** โ€” judge failing, falling back to heuristic -- **Very slow** โ€” each item runs a full agent loop (~30-90s). Use `--env.eval_size 5` for quick checks. - - -## Pattern 3: Generate Training Data (process mode, larger scale) - -Generate trajectory data for offline training or analysis: - -```bash -python environments/your_env.py process \ - --env.total_steps 50 \ - --env.group_size 4 \ - --env.use_wandb false \ - --env.data_path_to_save_groups data/trajectories.jsonl \ - --openai.base_url "" \ - --openai.model_name "" \ - --openai.server_type \ - --openai.health_check false -``` - -### Analyze the distribution: - -```python -import json -scores = [] -for line in open("data/trajectories.jsonl"): - data = json.loads(line) - scores.extend(data.get("scores", [])) - -print(f"Total: {len(scores)}, Mean: {sum(scores)/len(scores):.3f}") -for bucket in [0.0, 0.2, 0.4, 0.6, 0.8, 1.0]: - count = sum(1 for s in scores if abs(s - bucket) < 0.1) - print(f" {bucket:.1f}: {'โ–ˆ' * count} ({count})") -``` - -### What to check: -- **Score distribution has variance** โ€” RL needs score variance. All-same scores are useless. - - -## Pattern 4: Full RL Training (serve mode) - -For actual RL training with Atropos: - -```bash -# Terminal 1: Start Atropos API server -run-api - -# Terminal 2: Start your environment -python environments/your_env.py serve \ - --config environments/your_env/default.yaml -``` - -For Phase 2 with VLLM: - -```bash -# Terminal 1: VLLM server -python -m vllm.entrypoints.openai.api_server --model your-model --port 8000 - -# Terminal 2: Atropos API -run-api - -# Terminal 3: Environment -python environments/your_env.py serve \ - --openai.base_url http://localhost:8000/v1 \ - --openai.model_name your-model \ - --openai.server_type vllm -``` - - -## Pattern 5: Quick Smoke Test - -Verify imports and config before spending money on API calls: - -```python -from environments.your_env import YourEnv -print(f"Name: {YourEnv.name}") -cfg, servers = YourEnv.config_init() -print(f"Toolsets: {cfg.enabled_toolsets}") -print(f"Server: {servers[0].model_name}") -print("All imports OK") -``` - - -## Timing Expectations - -| Mode | Items | Time per item | Total | -|------|-------|--------------|-------| -| process (1 item) | 1 | 30-90s | ~1 min | -| evaluate (5 items) | 5 | 30-90s | ~5 min | -| evaluate (20 items) | 20 | 30-90s | ~15-30 min | -| process (50 items) | 50 | 30-90s | ~30-75 min | - -Times are for cloud APIs with Claude Sonnet-class models. Local models may be faster or slower depending on hardware. diff --git a/plugins/observability/langfuse/__init__.py b/plugins/observability/langfuse/__init__.py index 9c9583261a6f..8516030fb019 100644 --- a/plugins/observability/langfuse/__init__.py +++ b/plugins/observability/langfuse/__init__.py @@ -47,6 +47,7 @@ class TraceState: root_span: Any generations: Dict[str, Any] = field(default_factory=dict) tools: Dict[str, Any] = field(default_factory=dict) + pending_tools_by_name: Dict[str, list] = field(default_factory=dict) turn_tool_calls: list[dict[str, Any]] = field(default_factory=list) last_updated_at: float = field(default_factory=time.time) @@ -58,6 +59,17 @@ class TraceState: _READ_FILE_HEAD_LINES = 25 _READ_FILE_TAIL_LINES = 15 +# Langfuse-issued keys always carry these prefixes (cloud or self-hosted โ€” +# the prefix is baked into the server-side issuance flow, not a UI hint). +# Anything else (`placeholder`, `test-key`, `your-langfuse-key`, etc.) is a +# leftover template value and would cause the SDK to silently accept the +# credentials at construction time but drop every trace at flush time. +# See #23823 โ€” the silent-failure bug this guard fixes. +_LANGFUSE_KEY_PREFIXES: Dict[str, str] = { + "HERMES_LANGFUSE_PUBLIC_KEY": "pk-lf-", + "HERMES_LANGFUSE_SECRET_KEY": "sk-lf-", +} + def _env(name: str, default: str = "") -> str: return os.environ.get(name, default).strip() @@ -82,10 +94,49 @@ def _debug(message: str) -> None: # Sentinel: "_get_langfuse() has tried and failed". Lets us short-circuit # every subsequent hook call without re-checking env vars or re-attempting -# SDK init. Cleared by reset_cache_for_tests(). +# SDK init. Tests clear this by reloading the module via +# ``sys.modules.pop(...) + importlib.import_module(...)`` rather than via a +# dedicated reset function. Runtime callers cannot reset the cache; if an +# operator fixes a misconfigured credential they must restart the process. _INIT_FAILED = object() +def _redact_key_preview(value: str) -> str: + """Return a brief, log-safe preview of a credential value. + + Keeps enough characters to disambiguate common placeholders + (``placeholder``, ``test-key``, ``your-key``) without echoing a + real secret in full if an operator pasted one into the wrong env + var. Used only for the once-per-process placeholder-detection + warning in :func:`_get_langfuse`. + """ + if not value: + return "" + if len(value) <= 12: + return repr(value) + return repr(value[:6] + "...") + + +def _validate_langfuse_key(env_name: str, value: str) -> Optional[str]: + """Return an error message if ``value`` is not a real Langfuse key. + + Returns ``None`` when the value matches the documented Langfuse + prefix for ``env_name``, or when no prefix is registered for the + name (in which case we trust the operator). When validation + fails the returned string is suitable for direct inclusion in a + single log line โ€” it names the env var and shows a safe preview. + """ + expected = _LANGFUSE_KEY_PREFIXES.get(env_name, "") + if not expected: + return None + if value.startswith(expected): + return None + return ( + f"{env_name}={_redact_key_preview(value)} " + f"(expected {expected!r} prefix)" + ) + + def _get_langfuse() -> Optional[Langfuse]: """Return a cached Langfuse client, or ``None`` if unavailable. @@ -111,6 +162,33 @@ def _get_langfuse() -> Optional[Langfuse]: _LANGFUSE_CLIENT = _INIT_FAILED return None + # Reject placeholder credentials with a one-shot warning so the + # operator sees the misconfiguration instead of silently shipping a + # broken observability stack (#23823). The SDK does not validate + # keys at construction time โ€” it queues traces in memory and only + # discovers the auth failure when the background flush thread tries + # to post them, by which point the warning is buried under whatever + # else the process is logging. Catch it here, surface it once, and + # short-circuit via the same _INIT_FAILED path as the empty case. + placeholder_issues = [ + msg + for msg in ( + _validate_langfuse_key("HERMES_LANGFUSE_PUBLIC_KEY", public_key), + _validate_langfuse_key("HERMES_LANGFUSE_SECRET_KEY", secret_key), + ) + if msg + ] + if placeholder_issues: + logger.warning( + "Langfuse plugin: credentials look like placeholders, traces will " + "NOT be emitted (%s). Set real Langfuse keys (pk-lf-... / sk-lf-...) " + "or unset HERMES_LANGFUSE_PUBLIC_KEY / HERMES_LANGFUSE_SECRET_KEY to " + "silence this warning.", + "; ".join(placeholder_issues), + ) + _LANGFUSE_CLIENT = _INIT_FAILED + return None + base_url = _env("HERMES_LANGFUSE_BASE_URL") or _env("LANGFUSE_BASE_URL") or "https://cloud.langfuse.com" environment = _env("HERMES_LANGFUSE_ENV") or _env("LANGFUSE_ENV") release = _env("HERMES_LANGFUSE_RELEASE") or _env("LANGFUSE_RELEASE") @@ -328,6 +406,21 @@ def _extract_last_user_message(messages: Any) -> Any: return None +def _coerce_request_messages( + *, + request_messages: Any = None, + messages: Any = None, + conversation_history: Any = None, + user_message: Any = None, +) -> list[dict[str, Any]]: + for candidate in (request_messages, messages, conversation_history): + if isinstance(candidate, list): + return candidate + if user_message is None: + return [] + return [{"role": "user", "content": user_message}] + + def _serialize_messages(messages: Any) -> list[dict[str, Any]]: if not isinstance(messages, list): return [] @@ -343,8 +436,11 @@ def _serialize_messages(messages: Any) -> list[dict[str, Any]]: parse_json_strings=(role == "tool"), ), } - if role == "tool" and message.get("tool_call_id"): - item["tool_call_id"] = message.get("tool_call_id") + if role == "tool": + if message.get("tool_call_id"): + item["tool_call_id"] = message.get("tool_call_id") + if message.get("name"): + item["name"] = _safe_value(message.get("name")) if message.get("tool_calls"): item["tool_calls"] = _safe_value(message.get("tool_calls"), parse_json_strings=True) serialized.append(item) @@ -359,15 +455,16 @@ def _serialize_tool_calls(tool_calls: Any) -> list[dict[str, Any]]: fn = getattr(tool_call, "function", None) name = getattr(fn, "name", None) if fn else None arguments = getattr(fn, "arguments", None) if fn else None - if isinstance(arguments, str): - try: - arguments = json.loads(arguments) - except Exception: - pass + safe_arguments = _safe_value(arguments, parse_json_strings=False) serialized.append({ "id": getattr(tool_call, "id", None), + "type": getattr(tool_call, "type", None) or "function", "name": name, - "arguments": _safe_value(arguments, parse_json_strings=True), + "arguments": safe_arguments, + "function": { + "name": name, + "arguments": safe_arguments, + }, }) return serialized @@ -564,6 +661,9 @@ def _finish_trace(task_key: str, *, output: Any = None) -> None: _end_observation(observation) for observation in state.tools.values(): _end_observation(observation) + for queue in state.pending_tools_by_name.values(): + for observation in queue: + _end_observation(observation) final_output = _merge_trace_output(output, state) if final_output is not None: state.root_span.set_trace_io(output=final_output) @@ -636,6 +736,7 @@ def on_pre_llm_request( base_url: str = "", api_mode: str = "", api_call_count: int = 0, + request_messages: Any = None, messages: Any = None, turn_type: str = "user", message_count: int = 0, @@ -643,12 +744,21 @@ def on_pre_llm_request( approx_input_tokens: int = 0, request_char_count: int = 0, max_tokens: Any = None, + conversation_history: Any = None, + user_message: Any = None, **_: Any, ) -> None: client = _get_langfuse() if client is None: return + input_messages = _coerce_request_messages( + request_messages=request_messages, + messages=messages, + conversation_history=conversation_history, + user_message=user_message, + ) + task_key = _trace_key(task_id, session_id) req_key = _request_key(api_call_count) @@ -663,7 +773,7 @@ def on_pre_llm_request( provider=provider, model=model, api_mode=api_mode, - messages=messages, + messages=input_messages, client=client, ) _TRACE_STATE[task_key] = state @@ -676,7 +786,7 @@ def on_pre_llm_request( client=client, name=f"LLM call {api_call_count}", as_type="generation", - input_value=_serialize_messages(messages), + input_value=_serialize_messages(input_messages), metadata={ "provider": provider, "platform": platform, @@ -815,13 +925,12 @@ def on_pre_tool_call(*, tool_name: str = "", args: Any = None, task_id: str = "" return task_key = _trace_key(task_id, session_id) - tool_key = tool_call_id or f"{tool_name}:{time.time_ns()}" with _STATE_LOCK: state = _TRACE_STATE.get(task_key) if state is None: return - state.tools[tool_key] = _start_child_observation( + observation = _start_child_observation( state, client=client, name=f"Tool: {tool_name}", @@ -829,22 +938,29 @@ def on_pre_tool_call(*, tool_name: str = "", args: Any = None, task_id: str = "" input_value=_safe_value(args), metadata={"tool_name": tool_name, "tool_call_id": tool_call_id}, ) + if tool_call_id: + state.tools[tool_call_id] = observation + else: + state.pending_tools_by_name.setdefault(tool_name, []).append(observation) def on_post_tool_call(*, tool_name: str = "", args: Any = None, result: Any = None, task_id: str = "", session_id: str = "", tool_call_id: str = "", **_: Any) -> None: task_key = _trace_key(task_id, session_id) - tool_key = tool_call_id or "" observation = None with _STATE_LOCK: state = _TRACE_STATE.get(task_key) if state is None: return - if tool_key: - observation = state.tools.pop(tool_key, None) - elif state.tools: - _, observation = state.tools.popitem() + if tool_call_id: + observation = state.tools.pop(tool_call_id, None) + if observation is None: + queue = state.pending_tools_by_name.get(tool_name) + if queue: + observation = queue.pop(0) + if not queue: + state.pending_tools_by_name.pop(tool_name, None) if observation is None: return @@ -854,10 +970,24 @@ def on_post_tool_call(*, tool_name: str = "", args: Any = None, result: Any = No else: result_value = result result_value = _normalize_payload(result_value, tool_name=tool_name, args=args) + safe_result_value = _safe_value(result_value, parse_json_strings=True) + + # Backfill so the generation's tool_call record carries the result alongside arguments. + if tool_call_id: + with _STATE_LOCK: + state = _TRACE_STATE.get(task_key) + if state is not None: + for tool_call in reversed(state.turn_tool_calls): + if tool_call.get("id") == tool_call_id: + tool_call["output"] = safe_result_value + function_payload = tool_call.get("function") + if isinstance(function_payload, dict): + function_payload["output"] = safe_result_value + break _end_observation( observation, - output=_safe_value(result_value, parse_json_strings=True), + output=safe_result_value, metadata={"tool_name": tool_name, "args": _safe_value(args, parse_json_strings=True)}, ) diff --git a/plugins/platforms/simplex/__init__.py b/plugins/platforms/simplex/__init__.py new file mode 100644 index 000000000000..d4f1d7bf0e3f --- /dev/null +++ b/plugins/platforms/simplex/__init__.py @@ -0,0 +1,3 @@ +from .adapter import register + +__all__ = ["register"] diff --git a/plugins/platforms/simplex/adapter.py b/plugins/platforms/simplex/adapter.py new file mode 100644 index 000000000000..b568f29bbb5e --- /dev/null +++ b/plugins/platforms/simplex/adapter.py @@ -0,0 +1,746 @@ +"""SimpleX Chat platform adapter (Hermes plugin). + +Connects to a simplex-chat daemon running in WebSocket mode. +Inbound messages arrive via a persistent WebSocket connection. +Outbound messages use the same WebSocket with JSON commands. + +This adapter ships as a Hermes platform plugin under +``plugins/platforms/simplex/``. The Hermes plugin loader scans the +directory at startup, calls ``register(ctx)``, and the platform +becomes available to ``gateway/run.py`` and ``tools/send_message_tool`` +through the registry โ€” no edits to core files are required. + +SimpleX chat daemon setup: + simplex-chat -p 5225 # start daemon on port 5225 + # or via Docker: + # docker run -p 5225:5225 simplexchat/simplex-chat-cli -p 5225 + +Required environment variables: + SIMPLEX_WS_URL WebSocket URL of the daemon + (default: ws://127.0.0.1:5225) + +Optional environment variables: + SIMPLEX_ALLOWED_USERS Comma-separated contact IDs (allowlist) + SIMPLEX_ALLOW_ALL_USERS Set 'true' to allow all contacts + SIMPLEX_HOME_CHANNEL Default contact/group ID for cron delivery + SIMPLEX_HOME_CHANNEL_NAME Human label for the home channel + +The ``websockets`` Python package is imported lazily โ€” the plugin is +discoverable and `hermes setup` can describe it even when websockets is +not installed. ``check_requirements()`` returns False until the package +is present, so the gateway will not attempt to instantiate the adapter. +""" + +import asyncio +import json +import logging +import os +import random +import time +from datetime import datetime, timezone +from typing import Any, Dict, List, Optional + +# Lazy import: BasePlatformAdapter and friends live in the main repo. +# Imported at module top because they're stdlib-only inside Hermes โ€” no +# external dependency that would block the plugin from loading. +from gateway.config import Platform, PlatformConfig +from gateway.platforms.base import ( + BasePlatformAdapter, + MessageEvent, + MessageType, + SendResult, + cache_image_from_bytes, + cache_audio_from_bytes, + cache_document_from_bytes, +) + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- +MAX_MESSAGE_LENGTH = 16_000 # SimpleX has no hard limit; keep chunking sane +TYPING_INTERVAL = 10.0 +WS_RETRY_DELAY_INITIAL = 2.0 +WS_RETRY_DELAY_MAX = 60.0 +HEALTH_CHECK_INTERVAL = 30.0 +HEALTH_CHECK_STALE_THRESHOLD = 120.0 + +# Correlation ID prefix for requests we send so we can ignore our own echoes. +_CORR_PREFIX = "hermes-" + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _parse_comma_list(value: str) -> List[str]: + """Split a comma-separated string into a stripped list.""" + return [v.strip() for v in value.split(",") if v.strip()] + + +def _guess_extension(data: bytes) -> str: + """Guess file extension from magic bytes.""" + if data[:4] == b"\x89PNG": + return ".png" + if data[:2] == b"\xff\xd8": + return ".jpg" + if data[:4] == b"GIF8": + return ".gif" + if len(data) >= 12 and data[:4] == b"RIFF" and data[8:12] == b"WEBP": + return ".webp" + if data[:4] == b"%PDF": + return ".pdf" + if len(data) >= 8 and data[4:8] == b"ftyp": + return ".mp4" + if data[:4] == b"OggS": + return ".ogg" + if len(data) >= 2 and data[0] == 0xFF and (data[1] & 0xE0) == 0xE0: + return ".mp3" + return ".bin" + + +def _is_image_ext(ext: str) -> bool: + return ext.lower() in (".jpg", ".jpeg", ".png", ".gif", ".webp") + + +def _is_audio_ext(ext: str) -> bool: + return ext.lower() in (".mp3", ".wav", ".ogg", ".m4a", ".aac") + + +# --------------------------------------------------------------------------- +# SimpleX Adapter +# --------------------------------------------------------------------------- + +class SimplexAdapter(BasePlatformAdapter): + """SimpleX Chat adapter using the simplex-chat daemon WebSocket API. + + Instantiated by the ``adapter_factory`` passed to + ``ctx.register_platform()`` in :func:`register`. + """ + + def __init__(self, config: PlatformConfig, **kwargs): + platform = Platform("simplex") + super().__init__(config=config, platform=platform) + + extra = getattr(config, "extra", {}) or {} + self.ws_url = extra.get("ws_url", "ws://127.0.0.1:5225").rstrip("/") + + # Running state + self._ws = None # websockets connection + self._ws_task: Optional[asyncio.Task] = None + self._health_task: Optional[asyncio.Task] = None + self._typing_tasks: Dict[str, asyncio.Task] = {} + self._running = False + self._last_ws_activity = 0.0 + + # Track sent correlation IDs to filter echoes + self._pending_corr_ids: set = set() + self._max_pending_corr = 200 + + logger.info("SimpleX adapter initialized: url=%s", self.ws_url) + + # ------------------------------------------------------------------ + # Lifecycle + # ------------------------------------------------------------------ + + async def connect(self) -> bool: + """Connect to the simplex-chat daemon and start the WebSocket listener.""" + try: + import websockets # noqa: F401 + except ImportError: + logger.error( + "SimpleX: 'websockets' package not installed. " + "Run: pip install websockets" + ) + return False + + if not self.ws_url: + logger.error("SimpleX: SIMPLEX_WS_URL is required") + return False + + # Quick connectivity check โ€” try to open and immediately close + try: + import websockets as _wsclient + async with _wsclient.connect(self.ws_url, open_timeout=10): + pass + except Exception as e: + logger.error("SimpleX: cannot reach daemon at %s: %s", self.ws_url, e) + return False + + self._running = True + self._last_ws_activity = time.time() + self._ws_task = asyncio.create_task(self._ws_listener()) + self._health_task = asyncio.create_task(self._health_monitor()) + + logger.info("SimpleX: connected to %s", self.ws_url) + return True + + async def disconnect(self) -> None: + """Stop WebSocket listener and clean up.""" + self._running = False + + if self._ws_task: + self._ws_task.cancel() + try: + await self._ws_task + except asyncio.CancelledError: + pass + + if self._health_task: + self._health_task.cancel() + try: + await self._health_task + except asyncio.CancelledError: + pass + + for task in self._typing_tasks.values(): + task.cancel() + self._typing_tasks.clear() + + if self._ws: + try: + await self._ws.close() + except Exception: + pass + self._ws = None + + logger.info("SimpleX: disconnected") + + # ------------------------------------------------------------------ + # WebSocket listener + # ------------------------------------------------------------------ + + async def _ws_listener(self) -> None: + """Maintain a persistent WebSocket connection to the daemon.""" + import websockets as _wsclient + import websockets as _wsexc + + backoff = WS_RETRY_DELAY_INITIAL + + while self._running: + try: + logger.debug("SimpleX WS: connecting to %s", self.ws_url) + async with _wsclient.connect( + self.ws_url, + ping_interval=20, + ping_timeout=20, + ) as ws: + self._ws = ws + backoff = WS_RETRY_DELAY_INITIAL + self._last_ws_activity = time.time() + logger.info("SimpleX WS: connected") + + async for raw in ws: + if not self._running: + break + self._last_ws_activity = time.time() + try: + msg = json.loads(raw) + await self._handle_event(msg) + except json.JSONDecodeError: + logger.debug("SimpleX WS: invalid JSON: %.100s", raw) + except Exception: + logger.exception("SimpleX WS: error handling event") + + except asyncio.CancelledError: + break + except _wsexc.WebSocketException as e: + if self._running: + logger.warning( + "SimpleX WS: error: %s (reconnecting in %.0fs)", e, backoff + ) + except Exception as e: + if self._running: + logger.warning( + "SimpleX WS: unexpected error: %s (reconnecting in %.0fs)", + e, backoff, + ) + finally: + self._ws = None + + if self._running: + jitter = backoff * 0.2 * random.random() + await asyncio.sleep(backoff + jitter) + backoff = min(backoff * 2, WS_RETRY_DELAY_MAX) + + # ------------------------------------------------------------------ + # Health monitor + # ------------------------------------------------------------------ + + async def _health_monitor(self) -> None: + """Force reconnect if the WebSocket has been idle too long.""" + while self._running: + await asyncio.sleep(HEALTH_CHECK_INTERVAL) + if not self._running: + break + + elapsed = time.time() - self._last_ws_activity + if elapsed > HEALTH_CHECK_STALE_THRESHOLD: + logger.warning( + "SimpleX: WS idle for %.0fs, forcing reconnect", elapsed + ) + self._last_ws_activity = time.time() + if self._ws: + try: + await self._ws.close() + except Exception: + pass + + # ------------------------------------------------------------------ + # Inbound event handling + # ------------------------------------------------------------------ + + async def _handle_event(self, event: dict) -> None: + """Dispatch a daemon event to the appropriate handler.""" + resp_type = event.get("type") or event.get("resp", {}).get("type", "") + + # Filter responses to our own commands (echoes) + corr_id = event.get("corrId", "") + if corr_id and corr_id.startswith(_CORR_PREFIX): + self._pending_corr_ids.discard(corr_id) + return + + if resp_type == "newChatItem": + await self._handle_new_chat_item(event) + elif resp_type == "newChatItems": + # Batch variant โ€” process each item + items = event.get("chatItems") or [] + for item_wrapper in items: + await self._handle_new_chat_item(item_wrapper) + # Ignore all other event types (delivery receipts, contact updates, etc.) + + async def _handle_new_chat_item(self, wrapper: dict) -> None: + """Process a single newChatItem event into a MessageEvent.""" + # The daemon wraps the chat item differently depending on version; + # normalise both layouts. + chat_info = wrapper.get("chatInfo") or wrapper.get("chat") or {} + chat_item = wrapper.get("chatItem") or wrapper.get("item") or {} + + # Only process messages (not calls, deleted items, etc.) + item_content = chat_item.get("content") or {} + msg_content = item_content.get("msgContent") or {} + if not msg_content: + return + + # Filter out messages sent by us (direction == "snd") + meta = chat_item.get("meta") or {} + direction = (meta.get("itemStatus") or {}).get("type", "") + if direction in ("sndSent", "sndSentDirect", "sndSentViaProxy", "sndNew"): + return + + # Determine chat type and IDs + chat_type_raw = chat_info.get("type", "") + is_group = chat_type_raw in ("group", "groupInfo") + + if is_group: + group_info = chat_info.get("groupInfo") or chat_info.get("group") or {} + group_id = str(group_info.get("groupId") or group_info.get("id") or "") + group_name = group_info.get("displayName") or group_info.get("groupProfile", {}).get("displayName", "") + chat_id = f"group:{group_id}" if group_id else "" + chat_name = group_name + else: + contact_info = chat_info.get("contact") or {} + contact_id = str(contact_info.get("contactId") or contact_info.get("id") or "") + contact_name = ( + contact_info.get("displayName") + or contact_info.get("localDisplayName") + or contact_id + ) + chat_id = contact_id + chat_name = contact_name + + if not chat_id: + logger.debug("SimpleX: ignoring event with no chat_id") + return + + # Sender โ€” for groups the message includes a chatItemMember sub-object + member = chat_item.get("chatItemMember") or {} + if is_group and member: + sender_id = str(member.get("memberId") or member.get("id") or chat_id) + sender_name = ( + member.get("displayName") + or member.get("localDisplayName") + or sender_id + ) + else: + sender_id = chat_id + sender_name = chat_name + + # Extract text + text = msg_content.get("text") or "" + + # Media attachments + media_urls: List[str] = [] + media_types: List[str] = [] + file_info = chat_item.get("file") or {} + if file_info and file_info.get("fileStatus") not in ("cancelled", "error"): + file_id = file_info.get("fileId") + file_name = file_info.get("fileName", "file") + if file_id: + try: + cached = await self._fetch_file(file_id, file_name) + if cached: + ext = cached.rsplit(".", 1)[-1] + if _is_image_ext("." + ext): + media_types.append("image/" + ext.replace("jpg", "jpeg")) + elif _is_audio_ext("." + ext): + media_types.append("audio/" + ext) + else: + media_types.append("application/octet-stream") + media_urls.append(cached) + except Exception: + logger.exception("SimpleX: failed to fetch file %s", file_id) + + # Timestamp + ts_str = meta.get("itemTs") or meta.get("createdAt") or "" + try: + timestamp = datetime.fromisoformat(ts_str.replace("Z", "+00:00")) + except (ValueError, AttributeError): + timestamp = datetime.now(tz=timezone.utc) + + # Build source + source = self.build_source( + chat_id=chat_id, + chat_name=chat_name, + chat_type="group" if is_group else "dm", + user_id=sender_id, + user_name=sender_name, + ) + + # Message type + msg_type = MessageType.TEXT + if media_types: + if any(mt.startswith("audio/") for mt in media_types): + msg_type = MessageType.VOICE + elif any(mt.startswith("image/") for mt in media_types): + msg_type = MessageType.PHOTO + + event_obj = MessageEvent( + source=source, + text=text, + message_type=msg_type, + media_urls=media_urls, + media_types=media_types, + timestamp=timestamp, + raw_message=wrapper, + ) + + await self.handle_message(event_obj) + + async def _fetch_file(self, file_id: Any, file_name: str) -> Optional[str]: + """Ask the daemon to receive and return a file attachment.""" + # simplex-chat exposes `/api/v1/files/{fileId}` on an HTTP port + # when started with --http-port. However, the canonical WebSocket API + # does not have a direct binary download command; files are stored on + # the local filesystem after the daemon accepts them. + # + # We request acceptance first, then read from the daemon's local path. + corr_id = self._make_corr_id() + cmd = { + "corrId": corr_id, + "cmd": f"/freceive {file_id}", + } + await self._send_ws(cmd) + # The daemon will emit a chatItemUpdated event when the file lands; + # for simplicity we just wait briefly and rely on the daemon's default path. + await asyncio.sleep(2) + + # simplex-chat stores received files in ~/Downloads or a configured path. + # We try common locations. + for search_dir in ( + os.path.expanduser("~/Downloads"), + os.path.expanduser("~/.simplex/files"), + "/tmp/simplex_files", + ): + candidate = os.path.join(search_dir, file_name) + if os.path.exists(candidate): + with open(candidate, "rb") as f: + data = f.read() + ext = _guess_extension(data) + if _is_image_ext(ext): + return cache_image_from_bytes(data, ext) + elif _is_audio_ext(ext): + return cache_audio_from_bytes(data, ext) + else: + return cache_document_from_bytes(data, file_name) + return None + + # ------------------------------------------------------------------ + # Outbound messages + # ------------------------------------------------------------------ + + def _make_corr_id(self) -> str: + """Generate a unique correlation ID for a request.""" + corr_id = f"{_CORR_PREFIX}{int(time.time() * 1000)}-{random.randint(0, 9999)}" + self._pending_corr_ids.add(corr_id) + if len(self._pending_corr_ids) > self._max_pending_corr: + # Trim oldest โ€” sets are unordered so just clear the oldest half + to_remove = list(self._pending_corr_ids)[:self._max_pending_corr // 2] + self._pending_corr_ids -= set(to_remove) + return corr_id + + async def _send_ws(self, payload: dict) -> None: + """Send a JSON payload over the WebSocket, queuing if not yet connected.""" + import websockets as _wsexc + ws = self._ws + if not ws: + logger.debug("SimpleX: WS not connected, dropping outbound command") + return + try: + await ws.send(json.dumps(payload)) + except _wsexc.ConnectionClosed: + logger.warning("SimpleX: WS closed while sending") + except Exception as e: + logger.warning("SimpleX: WS send error: %s", e) + + async def send( + self, + chat_id: str, + content: str, + reply_to: Optional[str] = None, + metadata: Optional[Dict[str, Any]] = None, + ) -> SendResult: + """Send a text message to a contact or group.""" + corr_id = self._make_corr_id() + + if chat_id.startswith("group:"): + group_id = chat_id[6:] + cmd_str = f"#[{group_id}] {content}" + else: + cmd_str = f"@[{chat_id}] {content}" + + payload = { + "corrId": corr_id, + "cmd": cmd_str, + } + + await self._send_ws(payload) + return SendResult(success=True) + + async def send_typing(self, chat_id: str, metadata=None) -> None: + """SimpleX does not expose a typing indicator API โ€” no-op.""" + pass + + async def send_image( + self, + chat_id: str, + image_url: str, + caption: Optional[str] = None, + reply_to: Optional[str] = None, + metadata: Optional[Dict[str, Any]] = None, + ) -> SendResult: + """Send an image (URL) as a message with optional caption. + + SimpleX has no native ``send_image`` over the WebSocket API โ€” file + attachments require the daemon's filesystem-backed flow which is + not driven from this adapter. Fall back to a plain text message + containing the URL and caption. + """ + text = f"{caption}\n{image_url}".strip() if caption else image_url + return await self.send(chat_id, text, reply_to=reply_to, metadata=metadata) + + async def get_chat_info(self, chat_id: str) -> dict: + """Return basic chat info.""" + if chat_id.startswith("group:"): + return {"chat_id": chat_id, "type": "group", "name": chat_id[6:]} + return {"chat_id": chat_id, "type": "dm", "name": chat_id} + + +# --------------------------------------------------------------------------- +# Plugin entry-point hooks +# --------------------------------------------------------------------------- + +def check_requirements() -> bool: + """Plugin gate: require SIMPLEX_WS_URL AND the websockets package. + + Returning False keeps the platform out of ``get_connected_platforms()`` + so the gateway never instantiates the adapter when the dependency is + missing or no daemon URL is configured. + """ + if not os.getenv("SIMPLEX_WS_URL"): + return False + try: + import websockets # noqa: F401 + except ImportError: + return False + return True + + +def validate_config(config) -> bool: + """Validate that the platform config has enough info to connect.""" + extra = getattr(config, "extra", {}) or {} + ws_url = os.getenv("SIMPLEX_WS_URL") or extra.get("ws_url", "") + return bool(ws_url) + + +def is_connected(config) -> bool: + """Check whether SimpleX is configured (env or config.yaml).""" + extra = getattr(config, "extra", {}) or {} + ws_url = os.getenv("SIMPLEX_WS_URL") or extra.get("ws_url", "") + return bool(ws_url) + + +def _env_enablement() -> dict | None: + """Seed ``PlatformConfig.extra`` from env vars during gateway config load. + + Called by the platform registry's env-enablement hook BEFORE adapter + construction, so ``gateway status`` and ``get_connected_platforms()`` + reflect env-only configuration without instantiating the WebSocket + client. Returns ``None`` when SimpleX isn't minimally configured. + + The special ``home_channel`` key in the returned dict is handled by + the core hook โ€” it becomes a proper ``HomeChannel`` dataclass on the + ``PlatformConfig`` rather than being merged into ``extra``. + """ + ws_url = os.getenv("SIMPLEX_WS_URL", "").strip() + if not ws_url: + return None + seed: dict = {"ws_url": ws_url} + home = os.getenv("SIMPLEX_HOME_CHANNEL", "").strip() + if home: + seed["home_channel"] = { + "chat_id": home, + "name": os.getenv("SIMPLEX_HOME_CHANNEL_NAME", "").strip() or home, + } + return seed + + +async def _standalone_send( + pconfig, + chat_id: str, + message: str, + *, + thread_id: Optional[str] = None, + media_files: Optional[List[str]] = None, + force_document: bool = False, +) -> Dict[str, Any]: + """Open an ephemeral WebSocket to the daemon, send, and close. + + Used by ``tools/send_message_tool._send_via_adapter`` when the gateway + runner is not in this process (e.g. ``hermes cron`` running as a + separate process from ``hermes gateway``). Without this hook, + ``deliver=simplex`` cron jobs fail with "No live adapter for platform". + + ``thread_id`` and ``force_document`` are accepted for signature parity + with other plugins but are not meaningful here. ``media_files`` is + accepted but only the text body is delivered โ€” SimpleX requires the + daemon's filesystem-backed file flow which an ephemeral connection + cannot drive safely. + """ + try: + import websockets as _wsclient + except ImportError: + return {"error": "websockets not installed. Run: pip install websockets"} + + extra = getattr(pconfig, "extra", {}) or {} + ws_url = os.getenv("SIMPLEX_WS_URL") or extra.get("ws_url", "ws://127.0.0.1:5225") + if not ws_url: + return {"error": "SimpleX standalone send: SIMPLEX_WS_URL is required"} + + try: + if chat_id.startswith("group:"): + group_id = chat_id[6:] + cmd_str = f"#[{group_id}] {message}" + else: + cmd_str = f"@[{chat_id}] {message}" + + payload = { + "corrId": f"hermes-snd-{int(time.time() * 1000)}", + "cmd": cmd_str, + } + + async with _wsclient.connect(ws_url, open_timeout=10, close_timeout=5) as ws: + await ws.send(json.dumps(payload)) + # Give the daemon a moment to process the command before closing. + await asyncio.sleep(0.5) + + return {"success": True, "platform": "simplex", "chat_id": chat_id} + except Exception as e: + return {"error": f"SimpleX send failed: {e}"} + + +def interactive_setup() -> None: + """Minimal stdin wizard for ``hermes setup gateway`` โ†’ SimpleX. + + Prompts for the WebSocket URL and the optional allowlist / home channel. + Writes to ``~/.hermes/.env`` via ``hermes_cli.config``. + """ + print() + print("SimpleX Chat setup") + print("------------------") + print("Requirements:") + print(" 1. simplex-chat daemon running (e.g. `simplex-chat -p 5225`).") + print(" 2. Python package `websockets` installed (`pip install websockets`).") + print() + + try: + from hermes_cli.config import get_env_value, save_env_value + except ImportError: + print("hermes_cli.config not available; set SIMPLEX_* vars manually in ~/.hermes/.env") + return + + def _prompt(var: str, prompt: str, *, secret: bool = False) -> None: + existing = get_env_value(var) if callable(get_env_value) else None + suffix = " [keep current]" if existing else "" + try: + if secret: + import getpass + value = getpass.getpass(f"{prompt}{suffix}: ") + else: + value = input(f"{prompt}{suffix}: ").strip() + except (EOFError, KeyboardInterrupt): + print() + return + if value: + save_env_value(var, value) + + _prompt("SIMPLEX_WS_URL", "Daemon WebSocket URL (default ws://127.0.0.1:5225)") + _prompt("SIMPLEX_ALLOWED_USERS", "Allowed contact IDs (comma-separated; blank=skip)") + _prompt("SIMPLEX_HOME_CHANNEL", "Home channel contact/group ID (or empty)") + print("Done. Make sure the simplex-chat daemon is running before starting the gateway.") + + +def register(ctx) -> None: + """Plugin entry point โ€” called by the Hermes plugin system at startup.""" + ctx.register_platform( + name="simplex", + label="SimpleX Chat", + adapter_factory=lambda cfg: SimplexAdapter(cfg), + check_fn=check_requirements, + validate_config=validate_config, + is_connected=is_connected, + required_env=["SIMPLEX_WS_URL"], + install_hint="pip install websockets # SimpleX adapter requires the websockets package", + setup_fn=interactive_setup, + # Env-driven auto-configuration: seeds PlatformConfig.extra so + # env-only setups show up in `hermes gateway status` without + # instantiating the adapter. + env_enablement_fn=_env_enablement, + # Cron home-channel delivery support โ€” `deliver=simplex` cron jobs + # route to SIMPLEX_HOME_CHANNEL when set. + cron_deliver_env_var="SIMPLEX_HOME_CHANNEL", + # Out-of-process cron delivery. Without this hook, deliver=simplex + # cron jobs fail with "No live adapter" when cron runs separately + # from the gateway. + standalone_sender_fn=_standalone_send, + # Auth env vars for _is_user_authorized() integration + allowed_users_env="SIMPLEX_ALLOWED_USERS", + allow_all_env="SIMPLEX_ALLOW_ALL_USERS", + # SimpleX has no hard line length; we still chunk for sanity. + max_message_length=MAX_MESSAGE_LENGTH, + # Display + emoji="๐Ÿ”’", + # SimpleX uses opaque contact IDs only โ€” no phone numbers or + # email addresses to redact. + pii_safe=True, + allow_update_command=True, + # LLM guidance + platform_hint=( + "You are chatting via SimpleX Chat, a private decentralised " + "messenger. Contacts are identified by opaque internal IDs, " + "not phone numbers or usernames. SimpleX supports standard " + "markdown formatting. There is no typing indicator and no " + "hard message length limit, but keep responses conversational." + ), + ) diff --git a/plugins/platforms/simplex/plugin.yaml b/plugins/platforms/simplex/plugin.yaml new file mode 100644 index 000000000000..2bb87641b631 --- /dev/null +++ b/plugins/platforms/simplex/plugin.yaml @@ -0,0 +1,37 @@ +name: simplex-platform +label: SimpleX Chat +kind: platform +version: 1.0.0 +description: > + SimpleX Chat gateway adapter for Hermes Agent. + Connects to a local simplex-chat daemon via WebSocket and relays + messages between SimpleX contacts/groups and the Hermes agent. + SimpleX is decentralised and assigns no persistent user IDs โ€” + every contact is an opaque internal ID generated at connection + time, making it one of the most private messengers available. +author: Mibayy +# ``requires_env`` and ``optional_env`` entries are surfaced in the +# ``hermes config`` UI via the platform-plugin env var injector in +# ``hermes_cli/config.py``. +requires_env: + - name: SIMPLEX_WS_URL + description: "WebSocket URL of the simplex-chat daemon (e.g. ws://127.0.0.1:5225)" + prompt: "SimpleX daemon WebSocket URL" + password: false +optional_env: + - name: SIMPLEX_ALLOWED_USERS + description: "Comma-separated SimpleX contact IDs allowed to talk to the bot" + prompt: "Allowed contact IDs (comma-separated)" + password: false + - name: SIMPLEX_ALLOW_ALL_USERS + description: "Allow any contact to talk to the bot (dev only โ€” disables allowlist)" + prompt: "Allow all contacts? (true/false)" + password: false + - name: SIMPLEX_HOME_CHANNEL + description: "Default contact/group ID for cron / notification delivery" + prompt: "Home channel contact/group ID (or empty)" + password: false + - name: SIMPLEX_HOME_CHANNEL_NAME + description: "Human label for the home channel (defaults to the ID)" + prompt: "Home channel display name (or empty)" + password: false diff --git a/providers/base.py b/providers/base.py index a9e76823bb2e..fa6765d103c2 100644 --- a/providers/base.py +++ b/providers/base.py @@ -21,6 +21,20 @@ OMIT_TEMPERATURE = object() +def _profile_user_agent() -> str: + """Return a ``hermes-cli/`` UA string, with a stable fallback. + + Used by ``ProviderProfile.fetch_models`` so the catalog probe is not + served the default ``Python-urllib/`` UA โ€” some providers + (OpenCode Zen, etc.) sit behind a WAF that returns 403 for that. + """ + try: + from hermes_cli import __version__ as _ver # lazy: avoid layer cycle at import time + return f"hermes-cli/{_ver}" + except Exception: + return "hermes-cli" + + @dataclass class ProviderProfile: """Base provider profile โ€” subclass or instantiate with overrides.""" @@ -153,6 +167,10 @@ def fetch_models( if api_key: req.add_header("Authorization", f"Bearer {api_key}") req.add_header("Accept", "application/json") + # Some providers (e.g. OpenCode Zen) sit behind a WAF that blocks + # the default ``Python-urllib/`` User-Agent. Set a generic + # hermes-cli UA so the catalog endpoint is reachable. + req.add_header("User-Agent", _profile_user_agent()) for k, v in self.default_headers.items(): req.add_header(k, v) diff --git a/pyproject.toml b/pyproject.toml index a880bcb05bf8..ae2fff385a3a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -81,7 +81,7 @@ daytona = ["daytona==0.155.0"] vercel = ["vercel==0.5.7"] hindsight = ["hindsight-client==0.6.1"] dev = ["debugpy==1.8.20", "pytest==9.0.2", "pytest-asyncio==1.3.0", "pytest-xdist==3.8.0", "pytest-split==0.11.0", "mcp==1.26.0", "ty==0.0.21", "ruff==0.15.10"] -messaging = ["python-telegram-bot[webhooks]==22.6", "discord.py[voice]==2.7.1", "aiohttp==3.13.3", "slack-bolt==1.27.0", "slack-sdk==3.40.1", "qrcode==7.4.2"] +messaging = ["python-telegram-bot[webhooks]==22.6", "discord.py[voice]==2.7.1", "aiohttp==3.13.3", "brotlicffi==1.2.0.1", "slack-bolt==1.27.0", "slack-sdk==3.40.1", "qrcode==7.4.2"] cron = [] # croniter is now a core dependency; this extra kept for back-compat slack = ["slack-bolt==1.27.0", "slack-sdk==3.40.1", "aiohttp==3.13.3"] matrix = ["mautrix[encryption]==0.21.0", "Markdown==3.10.2", "aiosqlite==0.22.1", "asyncpg==0.31.0", "aiohttp-socks==0.11.0"] @@ -166,14 +166,6 @@ youtube = [ ] # `hermes dashboard` (localhost SPA + API). Not in core to keep the default install lean. web = ["fastapi==0.133.1", "uvicorn[standard]==0.41.0"] -rl = [ - "atroposlib @ git+https://github.com/NousResearch/atropos.git@c20c85256e5a45ad31edf8b7276e9c5ee1995a30", - "tinker @ git+https://github.com/thinking-machines-lab/tinker.git@30517b667f18a3dfb7ef33fb56cf686d5820ba2b", - "fastapi==0.133.1", - "uvicorn[standard]==0.41.0", - "wandb==0.25.1", -] -yc-bench = ["yc-bench @ git+https://github.com/collinear-ai/yc-bench.git@bfb0c88062450f46341bd9a5298903fc2e952a5c ; python_version >= '3.12'"] all = [ # Policy (2026-05-12): `[all]` includes only extras that genuinely # CAN'T be lazy-installed via `tools/lazy_deps.py` โ€” i.e. things every @@ -215,14 +207,15 @@ hermes-agent = "run_agent:main" hermes-acp = "acp_adapter.entry:main" [tool.setuptools] -py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajectory_compressor", "toolset_distributions", "cli", "hermes_bootstrap", "hermes_constants", "hermes_state", "hermes_time", "hermes_logging", "rl_cli", "utils"] +py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajectory_compressor", "toolset_distributions", "cli", "hermes_bootstrap", "hermes_constants", "hermes_state", "hermes_time", "hermes_logging", "utils"] [tool.setuptools.package-data] hermes_cli = ["web_dist/**/*"] gateway = ["assets/**/*"] +acp_adapter = ["bootstrap/*.sh", "bootstrap/*.ps1"] [tool.setuptools.packages.find] -include = ["agent", "agent.*", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "tui_gateway", "tui_gateway.*", "cron", "acp_adapter", "plugins", "plugins.*", "providers", "providers.*"] +include = ["agent", "agent.*", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "tui_gateway", "tui_gateway.*", "cron", "acp_adapter", "acp_adapter.*", "plugins", "plugins.*", "providers", "providers.*"] [tool.pytest.ini_options] testpaths = ["tests"] @@ -238,11 +231,7 @@ python-version = "3.13" unknown-argument = "warn" redundant-cast = "ignore" -[tool.ty.src] -exclude = ["tinker-atropos"] - [tool.ruff] -exclude = ["tinker-atropos"] preview = true # required for PLW1514 (unspecified-encoding) โ€” preview rule [tool.ruff.lint] diff --git a/rl_cli.py b/rl_cli.py deleted file mode 100644 index e3996a29df69..000000000000 --- a/rl_cli.py +++ /dev/null @@ -1,446 +0,0 @@ -#!/usr/bin/env python3 -""" -RL Training CLI Runner - -Dedicated CLI runner for RL training workflows with: -- Extended timeouts for long-running training -- RL-focused system prompts -- Full toolset including RL training tools -- Special handling for 30-minute check intervals - -Usage: - python rl_cli.py "Train a model on GSM8k for math reasoning" - python rl_cli.py --interactive - python rl_cli.py --list-environments - -Environment Variables: - TINKER_API_KEY: API key for Tinker service (required) - WANDB_API_KEY: API key for WandB metrics (required) - OPENROUTER_API_KEY: API key for OpenRouter (required for agent) -""" - -import asyncio -import os -import sys -from pathlib import Path - -import fire -import yaml - -from hermes_constants import OPENROUTER_BASE_URL, get_hermes_home - -# Load .env from ~/.hermes/.env first, then project root as dev fallback. -# User-managed env files should override stale shell exports on restart. -_hermes_home = get_hermes_home() -_project_env = Path(__file__).parent / '.env' - -from hermes_cli.env_loader import load_hermes_dotenv - -_loaded_env_paths = load_hermes_dotenv(hermes_home=_hermes_home, project_env=_project_env) -for _env_path in _loaded_env_paths: - print(f"โœ… Loaded environment variables from {_env_path}") - -# Set terminal working directory to tinker-atropos submodule -# This ensures terminal commands run in the right context for RL work -tinker_atropos_dir = Path(__file__).parent / 'tinker-atropos' -if tinker_atropos_dir.exists(): - os.environ['TERMINAL_CWD'] = str(tinker_atropos_dir) - os.environ['HERMES_QUIET'] = '1' # Disable temp subdirectory creation - print(f"๐Ÿ“‚ Terminal working directory: {tinker_atropos_dir}") -else: - # Fall back to hermes-agent directory if submodule not found - os.environ['TERMINAL_CWD'] = str(Path(__file__).parent) - os.environ['HERMES_QUIET'] = '1' - print(f"โš ๏ธ tinker-atropos submodule not found, using: {Path(__file__).parent}") - -# Import agent and tools -from run_agent import AIAgent -from tools.rl_training_tool import get_missing_keys - - -# ============================================================================ -# Config Loading -# ============================================================================ - -DEFAULT_MODEL = "anthropic/claude-opus-4.5" -DEFAULT_BASE_URL = OPENROUTER_BASE_URL - - -def load_hermes_config() -> dict: - """ - Load configuration from ~/.hermes/config.yaml. - - Returns: - dict: Configuration with model, base_url, etc. - """ - config_path = _hermes_home / 'config.yaml' - - config = { - "model": DEFAULT_MODEL, - "base_url": DEFAULT_BASE_URL, - } - - if config_path.exists(): - try: - with open(config_path, "r", encoding='utf-8') as f: - file_config = yaml.safe_load(f) or {} - - # Get model from config - if "model" in file_config: - if isinstance(file_config["model"], str): - config["model"] = file_config["model"] - elif isinstance(file_config["model"], dict): - config["model"] = file_config["model"].get("default", DEFAULT_MODEL) - - # Get base_url if specified - if "base_url" in file_config: - config["base_url"] = file_config["base_url"] - - except Exception as e: - print(f"โš ๏ธ Warning: Failed to load config.yaml: {e}") - - return config - - -# ============================================================================ -# RL-Specific Configuration -# ============================================================================ - -# Extended timeouts for long-running RL operations -RL_MAX_ITERATIONS = 200 # Allow many more iterations for long workflows - -# RL-focused system prompt -RL_SYSTEM_PROMPT = """You are an automated post-training engineer specializing in reinforcement learning for language models. - -## Your Capabilities - -You have access to RL training tools for running reinforcement learning on models through Tinker-Atropos: - -1. **DISCOVER**: Use `rl_list_environments` to see available RL environments -2. **INSPECT**: Read environment files to understand how they work (verifiers, data loading, rewards) -3. **INSPECT DATA**: Use terminal to explore HuggingFace datasets and understand their format -4. **CREATE**: Copy existing environments as templates, modify for your needs -5. **CONFIGURE**: Use `rl_select_environment` and `rl_edit_config` to set up training -6. **TEST**: Always use `rl_test_inference` before full training to validate your setup -7. **TRAIN**: Use `rl_start_training` to begin, `rl_check_status` to monitor -8. **EVALUATE**: Use `rl_get_results` and analyze WandB metrics to assess performance - -## Environment Files - -Environment files are located in: `tinker-atropos/tinker_atropos/environments/` - -Study existing environments to learn patterns. Look for: -- `load_dataset()` calls - how data is loaded -- `score_answer()` / `score()` - verification logic -- `get_next_item()` - prompt formatting -- `system_prompt` - instruction format -- `config_init()` - default configuration - -## Creating New Environments - -To create a new environment: -1. Read an existing environment file (e.g., gsm8k_tinker.py) -2. Use terminal to explore the target dataset format -3. Copy the environment file as a template -4. Modify the dataset loading, prompt formatting, and verifier logic -5. Test with `rl_test_inference` before training - -## Important Guidelines - -- **Always test before training**: Training runs take hours - verify everything works first -- **Monitor metrics**: Check WandB for reward/mean and percent_correct -- **Status check intervals**: Wait at least 30 minutes between status checks -- **Early stopping**: Stop training early if metrics look bad or stagnant -- **Iterate quickly**: Start with small total_steps to validate, then scale up - -## Available Toolsets - -You have access to: -- **RL tools**: Environment discovery, config management, training, testing -- **Terminal**: Run commands, inspect files, explore datasets -- **Web**: Search for information, documentation, papers -- **File tools**: Read and modify code files - -When asked to train a model, follow this workflow: -1. List available environments -2. Select and configure the appropriate environment -3. Test with sample prompts -4. Start training with conservative settings -5. Monitor progress and adjust as needed -""" - -# Toolsets to enable for RL workflows -RL_TOOLSETS = ["terminal", "web", "rl"] - - -# ============================================================================ -# Helper Functions -# ============================================================================ - -def check_requirements(): - """Check that all required environment variables and services are available.""" - errors = [] - - # Check API keys - if not os.getenv("OPENROUTER_API_KEY"): - errors.append("OPENROUTER_API_KEY not set - required for agent") - - missing_rl_keys = get_missing_keys() - if missing_rl_keys: - errors.append(f"Missing RL API keys: {', '.join(missing_rl_keys)}") - - if errors: - print("โŒ Missing requirements:") - for error in errors: - print(f" - {error}") - print("\nPlease set these environment variables in your .env file or shell.") - return False - - return True - - -def check_tinker_atropos(): - """Check if tinker-atropos submodule is properly set up.""" - tinker_path = Path(__file__).parent / "tinker-atropos" - - if not tinker_path.exists(): - return False, "tinker-atropos submodule not found. Run: git submodule update --init" - - envs_path = tinker_path / "tinker_atropos" / "environments" - if not envs_path.exists(): - return False, f"environments directory not found at {envs_path}" - - env_files = list(envs_path.glob("*.py")) - env_files = [f for f in env_files if not f.name.startswith("_")] - - return True, {"path": str(tinker_path), "environments_count": len(env_files)} - - -def list_environments_sync(): - """List available environments (synchronous wrapper).""" - from tools.rl_training_tool import rl_list_environments - import json - - async def _list(): - result = await rl_list_environments() - return json.loads(result) - - return asyncio.run(_list()) - - -# ============================================================================ -# Main CLI -# ============================================================================ - -def main( - task: str = None, - model: str = None, - api_key: str = None, - base_url: str = None, - max_iterations: int = RL_MAX_ITERATIONS, - interactive: bool = False, - list_environments: bool = False, - check_server: bool = False, - verbose: bool = False, - save_trajectories: bool = True, -): - """ - RL Training CLI - Dedicated runner for RL training workflows. - - Args: - task: The training task/goal (e.g., "Train a model on GSM8k for math") - model: Model to use for the agent (reads from ~/.hermes/config.yaml if not provided) - api_key: OpenRouter API key (uses OPENROUTER_API_KEY env var if not provided) - base_url: API base URL (reads from config or defaults to OpenRouter) - max_iterations: Maximum agent iterations (default: 200 for long workflows) - interactive: Run in interactive mode (multiple conversations) - list_environments: Just list available RL environments and exit - check_server: Check if RL API server is running and exit - verbose: Enable verbose logging - save_trajectories: Save conversation trajectories (default: True for RL) - - Examples: - # Train on a specific environment - python rl_cli.py "Train a model on GSM8k math problems" - - # Interactive mode - python rl_cli.py --interactive - - # List available environments - python rl_cli.py --list-environments - - # Check server status - python rl_cli.py --check-server - """ - # Load config from ~/.hermes/config.yaml - config = load_hermes_config() - - # Use config values if not explicitly provided - if model is None: - model = config["model"] - if base_url is None: - base_url = config["base_url"] - - print("๐ŸŽฏ RL Training Agent") - print("=" * 60) - - # Handle setup check - if check_server: - print("\n๐Ÿ” Checking tinker-atropos setup...") - ok, result = check_tinker_atropos() - if ok: - print("โœ… tinker-atropos submodule found") - print(f" Path: {result.get('path')}") - print(f" Environments found: {result.get('environments_count', 0)}") - - # Also check API keys - missing = get_missing_keys() - if missing: - print(f"\nโš ๏ธ Missing API keys: {', '.join(missing)}") - print(" Add them to ~/.hermes/.env") - else: - print("โœ… API keys configured") - else: - print(f"โŒ tinker-atropos not set up: {result}") - print("\nTo set up:") - print(" git submodule update --init") - print(" pip install -e ./tinker-atropos") - return - - # Handle environment listing - if list_environments: - print("\n๐Ÿ“‹ Available RL Environments:") - print("-" * 40) - try: - data = list_environments_sync() - if "error" in data: - print(f"โŒ Error: {data['error']}") - return - - envs = data.get("environments", []) - if not envs: - print("No environments found.") - print("\nMake sure tinker-atropos is set up:") - print(" git submodule update --init") - return - - for env in envs: - print(f"\n ๐Ÿ“ฆ {env['name']}") - print(f" Class: {env['class_name']}") - print(f" Path: {env['file_path']}") - if env.get('description'): - desc = env['description'][:100] + "..." if len(env.get('description', '')) > 100 else env.get('description', '') - print(f" Description: {desc}") - - print(f"\n๐Ÿ“Š Total: {len(envs)} environments") - print("\nUse `rl_select_environment(name)` to select an environment for training.") - except Exception as e: - print(f"โŒ Error listing environments: {e}") - print("\nMake sure tinker-atropos is set up:") - print(" git submodule update --init") - print(" pip install -e ./tinker-atropos") - return - - # Check requirements - if not check_requirements(): - sys.exit(1) - - # Set default task if none provided - if not task and not interactive: - print("\nโš ๏ธ No task provided. Use --interactive for interactive mode or provide a task.") - print("\nExamples:") - print(' python rl_cli.py "Train a model on GSM8k math problems"') - print(' python rl_cli.py "Create an RL environment for code generation"') - print(' python rl_cli.py --interactive') - return - - # Get API key - api_key = api_key or os.getenv("OPENROUTER_API_KEY") - if not api_key: - print("โŒ No API key provided. Set OPENROUTER_API_KEY or pass --api-key") - sys.exit(1) - - print(f"\n๐Ÿค– Model: {model}") - print(f"๐Ÿ”ง Max iterations: {max_iterations}") - print(f"๐Ÿ“ Toolsets: {', '.join(RL_TOOLSETS)}") - print("=" * 60) - - # Create agent with RL configuration - agent = AIAgent( - base_url=base_url, - api_key=api_key, - model=model, - max_iterations=max_iterations, - enabled_toolsets=RL_TOOLSETS, - save_trajectories=save_trajectories, - verbose_logging=verbose, - quiet_mode=False, - ephemeral_system_prompt=RL_SYSTEM_PROMPT, - ) - - if interactive: - # Interactive mode - multiple conversations - print("\n๐Ÿ”„ Interactive RL Training Mode") - print("Type 'quit' or 'exit' to end the session.") - print("Type 'status' to check active training runs.") - print("-" * 40) - - while True: - try: - user_input = input("\n๐ŸŽฏ RL Task> ").strip() - - if not user_input: - continue - - if user_input.lower() in {'quit', 'exit', 'q'}: - print("\n๐Ÿ‘‹ Goodbye!") - break - - if user_input.lower() == 'status': - # Quick status check - from tools.rl_training_tool import rl_list_runs - import json - result = asyncio.run(rl_list_runs()) - runs = json.loads(result) - if isinstance(runs, list) and runs: - print("\n๐Ÿ“Š Active Runs:") - for run in runs: - print(f" - {run['run_id']}: {run['environment']} ({run['status']})") - else: - print("\nNo active runs.") - continue - - # Run the agent - print("\n" + "=" * 60) - agent.run_conversation(user_input) - print("\n" + "=" * 60) - - except KeyboardInterrupt: - print("\n\n๐Ÿ‘‹ Interrupted. Goodbye!") - break - except Exception as e: - print(f"\nโŒ Error: {e}") - if verbose: - import traceback - traceback.print_exc() - else: - # Single task mode - print(f"\n๐Ÿ“ Task: {task}") - print("-" * 40) - - try: - agent.run_conversation(task) - print("\n" + "=" * 60) - print("โœ… Task completed") - except KeyboardInterrupt: - print("\n\nโš ๏ธ Interrupted by user") - except Exception as e: - print(f"\nโŒ Error: {e}") - if verbose: - import traceback - traceback.print_exc() - sys.exit(1) - - -if __name__ == "__main__": - fire.Fire(main) diff --git a/run_agent.py b/run_agent.py index 906f706d08a3..a4df87497772 100644 --- a/run_agent.py +++ b/run_agent.py @@ -4268,6 +4268,7 @@ def _bg_review_auto_deny(command, description, **kwargs): except Exception: pass review_agent = None + review_messages = [] try: with open(os.devnull, "w", encoding="utf-8") as _devnull, \ contextlib.redirect_stdout(_devnull), \ @@ -4385,6 +4386,7 @@ def _bg_review_auto_deny(command, description, **kwargs): review_agent.close() except Exception: pass + review_messages = list(getattr(review_agent, "_session_messages", [])) review_agent = None # Scan the review agent's messages for successful tool actions @@ -4394,7 +4396,7 @@ def _bg_review_auto_deny(command, description, **kwargs): # re-surface stale "created"/"updated" messages from the prior # conversation as if they just happened (issue #14944). actions = self._summarize_background_review_actions( - getattr(review_agent, "_session_messages", []), + review_messages, messages_snapshot, ) @@ -12205,7 +12207,7 @@ def run_conversation( codex_ack_continuations = 0 length_continue_retries = 0 truncated_tool_call_retries = 0 - truncated_response_prefix = "" + truncated_response_parts: List[str] = [] compression_attempts = 0 _turn_exit_reason = "unknown" # Diagnostic: why the loop ended @@ -12666,16 +12668,30 @@ def run_conversation( try: from hermes_cli.plugins import invoke_hook as _invoke_hook + request_messages = api_kwargs.get("messages") + if not isinstance(request_messages, list): + request_messages = api_kwargs.get("input") + if not isinstance(request_messages, list): + request_messages = api_messages + # Shallow-copy the outer list so plugins that retain the + # reference for async snapshotting don't observe later + # mutations of api_messages. The inner dicts are not + # mutated by the agent loop, so a shallow copy is + # sufficient; a deepcopy would walk every tool result + # and base64 image on every API call. _invoke_hook( "pre_api_request", task_id=effective_task_id, session_id=self.session_id or "", + user_message=original_user_message, + conversation_history=list(messages), platform=self.platform or "", model=self.model, provider=self.provider, base_url=self.base_url, api_mode=self.api_mode, api_call_count=api_call_count, + request_messages=list(request_messages) if isinstance(request_messages, list) else [], message_count=len(api_messages), tool_count=len(self.tools or []), approx_input_tokens=approx_tokens, @@ -13098,7 +13114,7 @@ def _stop_spinner(): interim_msg = self._build_assistant_message(assistant_message, finish_reason) messages.append(interim_msg) if assistant_message.content: - truncated_response_prefix += assistant_message.content + truncated_response_parts.append(assistant_message.content) if length_continue_retries < 3: self._vprint( @@ -13119,7 +13135,7 @@ def _stop_spinner(): restart_with_length_continuation = True break - partial_response = self._strip_think_blocks(truncated_response_prefix).strip() + partial_response = self._strip_think_blocks("".join(truncated_response_parts)).strip() self._cleanup_task_resources(effective_task_id) self._persist_session(messages, conversation_history) return { @@ -14580,7 +14596,9 @@ def _stop_spinner(): finish_reason=finish_reason, message_count=len(api_messages), response_model=getattr(response, "model", None), + response=response, usage=self._usage_summary_for_api_request_hook(response), + assistant_message=assistant_message, assistant_content_chars=len(_assistant_text), assistant_tool_call_count=len(_assistant_tool_calls), ) @@ -15323,9 +15341,9 @@ def _stop_spinner(): codex_ack_continuations = 0 - if truncated_response_prefix: - final_response = truncated_response_prefix + final_response - truncated_response_prefix = "" + if truncated_response_parts: + final_response = "".join(truncated_response_parts) + final_response + truncated_response_parts = [] length_continue_retries = 0 final_response = self._strip_think_blocks(final_response).strip() diff --git a/scripts/install.ps1 b/scripts/install.ps1 index 36cdf76ec70b..2cf81969beb5 100644 --- a/scripts/install.ps1 +++ b/scripts/install.ps1 @@ -958,20 +958,6 @@ except Exception: } } - # tinker-atropos (RL training) is optional and OFF by default. Matches the - # Linux/macOS install.sh behavior. Reasons not to auto-install: - # - tinker-atropos/pyproject.toml pulls atroposlib + tinker from git+https - # (NousResearch/atropos + thinking-machines-lab/tinker) which can fail on - # locked-down networks, flaky DNS, or rate-limited github.com and would - # previously kill the whole install mid-flight on Windows. - # - It's an RL training submodule, not part of the default agent surface. - # Users who don't do RL training never need it. - # Users who do want it can run the one-liner we print below. - if (Test-Path "tinker-atropos\pyproject.toml") { - Write-Info "tinker-atropos submodule found โ€” skipping install (optional, for RL training)" - Write-Info " To install later: $UvCmd pip install -e `".\tinker-atropos`"" - } - Pop-Location Write-Success "All dependencies installed" diff --git a/scripts/install.sh b/scripts/install.sh index cf24912cc519..9c5db6b1c080 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -1051,11 +1051,6 @@ install_deps() { log_info "Termux note: matrix e2ee and local faster-whisper extras are excluded from .[termux-all] due to upstream Android wheel/toolchain blockers." log_info "Termux note: browser/WhatsApp tooling is not installed by default; see the Termux guide for optional follow-up steps." - if [ -d "tinker-atropos" ] && [ -f "tinker-atropos/pyproject.toml" ]; then - log_info "tinker-atropos submodule found โ€” skipping install (optional, for RL training)" - log_info " To install later: $PIP_PYTHON -m pip install -e \"./tinker-atropos\"" - fi - log_success "All dependencies installed" return 0 fi @@ -1243,13 +1238,6 @@ PY log_success "Main package installed" - # tinker-atropos (RL training) is optional โ€” skip by default. - # To enable RL tools: git submodule update --init tinker-atropos && uv pip install -e "./tinker-atropos" - if [ -d "tinker-atropos" ] && [ -f "tinker-atropos/pyproject.toml" ]; then - log_info "tinker-atropos submodule found โ€” skipping install (optional, for RL training)" - log_info " To install: $UV_CMD pip install -e \"./tinker-atropos\"" - fi - log_success "All dependencies installed" } diff --git a/scripts/release.py b/scripts/release.py index a681daa49dec..f3df43c3fe1d 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -21,6 +21,7 @@ """ import argparse +import json import re import shutil import subprocess @@ -33,6 +34,11 @@ VERSION_FILE = REPO_ROOT / "hermes_cli" / "__init__.py" PYPROJECT_FILE = REPO_ROOT / "pyproject.toml" +# ACP Registry manifest must stay version-locked with pyproject.toml. +# tests/acp/test_registry_manifest.py enforces this lockstep so the release +# bump touches both files atomically. +ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json" + # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ # Git email โ†’ GitHub username mapping # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ @@ -43,6 +49,7 @@ "teknium1@gmail.com": "teknium1", "30366221+WorldWriter@users.noreply.github.com": "WorldWriter", "dafeng@DafengdeMacBook-Pro.local": "WorldWriter", + "anadi.jaggia@gmail.com": "Jaggia", "32201324+simpolism@users.noreply.github.com": "simpolism", "simpolism@gmail.com": "simpolism", "jake@nousresearch.com": "simpolism", @@ -51,23 +58,32 @@ "altriatree@gmail.com": "TruaShamu", "m@mobrienv.dev": "mikeyobrien", "qiyin.zuo@pcitc.com": "qiyin-code", + "mr.aashiz@gmail.com": "aashizpoudel", + "nidhi2894@gmail.com": "nidhi-singh02", + "30312689+aashizpoudel@users.noreply.github.com": "aashizpoudel", "oleksii.lisikh@gmail.com": "olisikh", + "jeremy@geocaching.com": "outdoorsea", "leone.parise@gmail.com": "leoneparise", "mr@shu.io": "mrshu", + "adam.manning@gmail.com": "am423", "buraysandro9@gmail.com": "ygd58", + "108427749+buntingszn@users.noreply.github.com": "buntingszn", "yanglongwei06@gmail.com": "Alex-yang00", "teknium@nousresearch.com": "teknium1", "piyushvp1@gmail.com": "thelumiereguy", "421774554@qq.com": "wuli666", "harish.kukreja@gmail.com": "counterposition", + "korkyzer@gmail.com": "Korkyzer", "1046611633@qq.com": "zhengyn0001", "1095245867@qq.com": "littlewwwhite", "db@project-aeon.com": "db-aeon", "ahmed@abadr.net": "ahmedbadr3", + "63822243+CoinTheHat@users.noreply.github.com": "CoinTheHat", "cleo@edaphic.xyz": "curiouscleo", "hirokazu.ogawa@kwansei.ac.jp": "hrkzogw", "datapod.k@gmail.com": "dandacompany", "treydong.zh@gmail.com": "TreyDong", + "phil.thomas@gametime.co": "explainanalyze", "kyanam.preetham@gmail.com": "pkyanam", "zhizhong.xu@shopee.com": "1000Delta", "30397170+1000Delta@users.noreply.github.com": "1000Delta", @@ -76,6 +92,8 @@ "zhanganzhe@tenclass.com": "luoyuctl", "51604064+luoyuctl@users.noreply.github.com": "luoyuctl", "127238744+teknium1@users.noreply.github.com": "teknium1", + "tolle.lege+github@gmail.com": "InB4DevOps", + "73686890+InB4DevOps@users.noreply.github.com": "InB4DevOps", "147827411+EloquentBrush@users.noreply.github.com": "AhmetArif0", "97489706+purzbeats@users.noreply.github.com": "purzbeats", "hugosequier@gmail.com": "Hugo-SEQUIER", @@ -98,6 +116,7 @@ "oswaldb22@users.noreply.github.com": "oswaldb22", "abdielv@proton.me": "AJV20", "mason@growagainorchids.com": "masonjames", + "108541149+amethystani@users.noreply.github.com": "amethystani", "ytchen0719@gmail.com": "liquidchen", "am@studio1.tailb672fe.ts.net": "subtract0", "mike@grossmann.at": "ReqX", @@ -204,6 +223,7 @@ "74749461+yuga-hashimoto@users.noreply.github.com": "yuga-hashimoto", "xiangyong@zspace.cn": "CES4751", "harish.kukreja@gmail.com": "counterposition", + "nidhi2894@gmail.com": "nidhi-singh02", "35294173+Fearvox@users.noreply.github.com": "Fearvox", "hypnus.yuan@gmail.com": "Hypnus-Yuan", "15558128926@qq.com": "xsfX20", @@ -245,6 +265,7 @@ "yuxiangl490@gmail.com": "y0shua1ee", "manmit0x@gmail.com": "0xDevNinja", "stevekelly622@gmail.com": "steezkelly", + "brian@dralth.com": "btorresgil", "momowind@gmail.com": "momowind", "clockwork-codex@users.noreply.github.com": "misery-hl", "207811921+misery-hl@users.noreply.github.com": "misery-hl", @@ -385,6 +406,7 @@ "Mibayy@users.noreply.github.com": "Mibayy", "mibayy@users.noreply.github.com": "Mibayy", "mibay@clawhub.io": "Mibayy", + "louismichalot@hotmail.com": "Mibayy", "135070653+sgaofen@users.noreply.github.com": "sgaofen", "lzy.dev@gmail.com": "zhiyanliu", "me@janstepanovsky.cz": "hhhonzik", @@ -626,6 +648,7 @@ "skmishra1991@gmail.com": "bugkill3r", "karamusti912@gmail.com": "MustafaKara7", "kira@ariaki.me": "kira-ariaki", + "kira.ops@proton.me": "KiraKatana", "knopki@duck.com": "knopki", "limars874@gmail.com": "limars874", "lisicheng168@gmail.com": "lesterli", @@ -767,6 +790,8 @@ "chayton@sina.com": "ycbai", "longsizhuo@gmail.com": "longsizhuo", "chenb19870707@gmail.com": "ms-alan", + "agorgianitisj@hotmail.com": "johnisag", + "phil.thomas@gametime.co": "explainanalyze", "276886827+WuTianyi123@users.noreply.github.com": "WuTianyi123", "22549957+li0near@users.noreply.github.com": "li0near", "guoyu801@gmail.com": "li0near", @@ -865,6 +890,8 @@ "dpaluy@users.noreply.github.com": "dpaluy", "psikonetik@gmail.com": "el-analista", "chenb19870707@gmail.com": "ms-alan", + "agorgianitisj@hotmail.com": "johnisag", + "phil.thomas@gametime.co": "explainanalyze", "hex-clawd@users.noreply.github.com": "hex-clawd", "154585401+LeonSGP43@users.noreply.github.com": "LeonSGP43", "barteq@hacknotes.local": "barteqpl", @@ -1045,6 +1072,8 @@ "37467487+yifengingit@users.noreply.github.com": "yifengingit", # PR #25589 salvage (AUTOINCREMENT id ordering) "89525629+vanthinh6886@users.noreply.github.com": "vanthinh6886", # PR #25562 salvage (.env 0600 perms) "16034932+Arkmusn@users.noreply.github.com": "Arkmusn", # PR #25559 salvage (approvals.timeout from config) + "nidhi2894@gmail.com": "nidhi-singh02", # PR #2752 salvage (slack whitespace-only IndexError guard) + "38173192+nidhi-singh02@users.noreply.github.com": "nidhi-singh02", } @@ -1146,19 +1175,48 @@ def update_version_files(semver: str, calver_date: str): ) PYPROJECT_FILE.write_text(pyproject) + # Update ACP Registry manifest + npm launcher (must stay version-locked + # with pyproject โ€” enforced by tests/acp/test_registry_manifest.py). + _update_acp_registry_versions(semver) + + +def _update_acp_registry_versions(semver: str) -> None: + """Bump the ACP Registry manifest's version + uvx package pin in lockstep + with pyproject. + + Skips silently if the manifest is missing โ€” older release branches predate + the ACP Registry assets. + """ + if ACP_REGISTRY_MANIFEST.exists(): + manifest = json.loads(ACP_REGISTRY_MANIFEST.read_text(encoding="utf-8")) + manifest["version"] = semver + uvx = manifest.get("distribution", {}).get("uvx", {}) + if "package" in uvx: + uvx["package"] = f"hermes-agent[acp]=={semver}" + # Preserve trailing newline + 2-space indent the file already uses. + ACP_REGISTRY_MANIFEST.write_text( + json.dumps(manifest, indent=2) + "\n", encoding="utf-8" + ) + def build_release_artifacts(semver: str) -> list[Path]: """Build sdist/wheel artifacts for the current release. - Returns the artifact paths when the local environment has ``python -m build`` - available. If build tooling is missing or the build fails, returns an empty - list and lets the release proceed without attached Python artifacts. + Tries ``uv build`` first (matching the CI workflow), falls back to + ``python -m build`` if uv is unavailable. """ dist_dir = REPO_ROOT / "dist" shutil.rmtree(dist_dir, ignore_errors=True) + # Prefer uv build (matches CI workflow), fall back to python -m build. + uv_bin = shutil.which("uv") + if uv_bin: + cmd = [uv_bin, "build", "--sdist", "--wheel"] + else: + cmd = [sys.executable, "-m", "build", "--sdist", "--wheel"] + result = subprocess.run( - [sys.executable, "-m", "build", "--sdist", "--wheel"], + cmd, cwd=str(REPO_ROOT), capture_output=True, text=True, @@ -1171,7 +1229,7 @@ def build_release_artifacts(semver: str) -> list[Path]: print(f" {stderr.splitlines()[-1]}") elif stdout: print(f" {stdout.splitlines()[-1]}") - print(" Install the 'build' package to attach semver-named sdist/wheel assets.") + print(" Install uv or the 'build' package to attach sdist/wheel assets.") return [] artifacts = sorted(p for p in dist_dir.iterdir() if p.is_file()) @@ -1278,11 +1336,11 @@ def get_commits(since_tag=None): else: range_spec = "HEAD" - # Format: hash|author_name|author_email|subject\0body - # Using %x00 (null) as separator between subject and body + # Format: hashauthor_nameauthor_emailsubject\0body + # Using %x1f (unit separator) to avoid conflict with | in author names log = git( "log", range_spec, - "--format=%H|%an|%ae|%s%x00%b%x00", + "--format=%H%x1f%an%x1f%ae%x1f%s%x00%b%x00", "--no-merges", ) @@ -1296,14 +1354,14 @@ def get_commits(since_tag=None): entry = entry.strip() if not entry: continue - # Split on first null to separate "hash|name|email|subject" from "body" + # Split on first null to separate "hashnameemailsubject" from "body" if "\0" in entry: header, body = entry.split("\0", 1) body = body.strip() else: header = entry body = "" - parts = header.split("|", 3) + parts = header.split("\x1f", 3) if len(parts) != 4: continue sha, name, email, subject = parts @@ -1323,7 +1381,7 @@ def get_commits(since_tag=None): return commits -def get_pr_number(subject: str) -> str: +def get_pr_number(subject: str) -> str | None: """Extract PR number from commit subject if present.""" match = re.search(r"#(\d+)", subject) if match: @@ -1474,6 +1532,7 @@ def main(): print("No previous tags found. Use --first-release for the initial release.") print(f"Would create tag: {tag_name}") print(f"Would set version: {new_version}") + return # Get commits commits = get_commits(since_tag=prev_tag) @@ -1518,7 +1577,10 @@ def main(): print(f" โœ“ Updated version files to v{new_version} ({calver_date})") # Commit version bump - add_result = git_result("add", str(VERSION_FILE), str(PYPROJECT_FILE)) + add_files = [str(VERSION_FILE), str(PYPROJECT_FILE)] + if ACP_REGISTRY_MANIFEST.exists(): + add_files.append(str(ACP_REGISTRY_MANIFEST)) + add_result = git_result("add", *add_files) if add_result.returncode != 0: print(f" โœ— Failed to stage version files: {add_result.stderr.strip()}") return @@ -1560,7 +1622,7 @@ def main(): # Create GitHub release changelog_file = REPO_ROOT / ".release_notes.md" - changelog_file.write_text(changelog) + changelog_file.write_text(changelog, encoding="utf-8") gh_cmd = [ "gh", "release", "create", tag_name, diff --git a/scripts/whatsapp-bridge/bridge.js b/scripts/whatsapp-bridge/bridge.js index 9ff64471e566..5723d8b543b8 100644 --- a/scripts/whatsapp-bridge/bridge.js +++ b/scripts/whatsapp-bridge/bridge.js @@ -57,11 +57,28 @@ const REPLY_PREFIX = process.env.WHATSAPP_REPLY_PREFIX === undefined : process.env.WHATSAPP_REPLY_PREFIX.replace(/\\n/g, '\n'); const MAX_MESSAGE_LENGTH = parseInt(process.env.WHATSAPP_MAX_MESSAGE_LENGTH || '4096', 10); const CHUNK_DELAY_MS = parseInt(process.env.WHATSAPP_CHUNK_DELAY_MS || '300', 10); +// Per-call timeout for sock.sendMessage(). Baileys occasionally hangs forever +// when uploading media to WhatsApp servers (and, less often, on text sends), +// which pins the bridge's HTTP handler until the upstream aiohttp timeout +// fires. Fail fast instead so the gateway can surface a real error and retry. +const SEND_TIMEOUT_MS = parseInt(process.env.WHATSAPP_SEND_TIMEOUT_MS || '60000', 10); function sleep(ms) { return new Promise(resolve => setTimeout(resolve, ms)); } +function sendWithTimeout(chatId, payload, timeoutMs = SEND_TIMEOUT_MS) { + let timer; + const timeoutPromise = new Promise((_, reject) => { + timer = setTimeout( + () => reject(new Error(`sendMessage timed out after ${timeoutMs / 1000}s`)), + timeoutMs, + ); + }); + return Promise.race([sock.sendMessage(chatId, payload), timeoutPromise]) + .finally(() => clearTimeout(timer)); +} + function formatOutgoingMessage(message) { // In bot mode, messages come from a different number so the prefix is // redundant โ€” the sender identity is already clear. Only prepend in @@ -487,7 +504,7 @@ app.post('/send', async (req, res) => { const chunks = splitLongMessage(formatOutgoingMessage(message)); const messageIds = []; for (let i = 0; i < chunks.length; i += 1) { - const sent = await sock.sendMessage(chatId, { text: chunks[i] }); + const sent = await sendWithTimeout(chatId, { text: chunks[i] }); trackSentMessageId(sent); if (sent?.key?.id) messageIds.push(sent.key.id); if (chunks.length > 1 && i < chunks.length - 1) { @@ -521,10 +538,10 @@ app.post('/edit', async (req, res) => { const chunks = splitLongMessage(formatOutgoingMessage(message)); const messageIds = []; - await sock.sendMessage(chatId, { text: chunks[0], edit: key }); + await sendWithTimeout(chatId, { text: chunks[0], edit: key }); if (chunks.length > 1) { for (let i = 1; i < chunks.length; i += 1) { - const sent = await sock.sendMessage(chatId, { text: chunks[i] }); + const sent = await sendWithTimeout(chatId, { text: chunks[i] }); trackSentMessageId(sent); if (sent?.key?.id) messageIds.push(sent.key.id); if (i < chunks.length - 1) { @@ -625,7 +642,7 @@ app.post('/send-media', async (req, res) => { break; } - const sent = await sock.sendMessage(chatId, msgPayload); + const sent = await sendWithTimeout(chatId, msgPayload); trackSentMessageId(sent); diff --git a/setup-hermes.sh b/setup-hermes.sh index 2aa773c1c9c6..bdb8c1e96535 100755 --- a/setup-hermes.sh +++ b/setup-hermes.sh @@ -267,22 +267,6 @@ else fi # ============================================================================ -# Submodules (terminal backend + RL training) -# ============================================================================ - -echo -e "${CYAN}โ†’${NC} Installing optional submodules..." - -# tinker-atropos (RL training backend) -if is_termux; then - echo -e "${CYAN}โ†’${NC} Skipping tinker-atropos on Termux (not part of the tested Android path)" -elif [ -d "tinker-atropos" ] && [ -f "tinker-atropos/pyproject.toml" ]; then - $UV_CMD pip install -e "./tinker-atropos" && \ - echo -e "${GREEN}โœ“${NC} tinker-atropos installed" || \ - echo -e "${YELLOW}โš ${NC} tinker-atropos install failed (RL tools may not work)" -else - echo -e "${YELLOW}โš ${NC} tinker-atropos not found (run: git submodule update --init --recursive)" -fi - # ============================================================================ # Optional: ripgrep (for faster file search) # ============================================================================ diff --git a/tests/acp/test_auth.py b/tests/acp/test_auth.py index ffb07463f8d6..0610d3e33505 100644 --- a/tests/acp/test_auth.py +++ b/tests/acp/test_auth.py @@ -1,6 +1,11 @@ """Tests for acp_adapter.auth โ€” provider detection.""" -from acp_adapter.auth import has_provider, detect_provider +from acp_adapter.auth import ( + TERMINAL_SETUP_AUTH_METHOD_ID, + build_auth_methods, + has_provider, + detect_provider, +) class TestHasProvider: @@ -54,3 +59,44 @@ def _boom(): monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _boom) assert detect_provider() is None + + def test_detect_provider_strips_and_lowercases_provider(self, monkeypatch): + monkeypatch.setattr( + "hermes_cli.runtime_provider.resolve_runtime_provider", + lambda: {"provider": " OpenRouter ", "api_key": " sk-or-test "}, + ) + assert detect_provider() == "openrouter" + + +class TestBuildAuthMethods: + def test_build_auth_methods_returns_provider_and_terminal_when_configured(self, monkeypatch): + monkeypatch.setattr("acp_adapter.auth.detect_provider", lambda: "openrouter") + + methods = build_auth_methods() + payloads = [method.model_dump(by_alias=True, exclude_none=True) for method in methods] + + assert payloads[0]["id"] == "openrouter" + assert payloads[0]["name"] == "openrouter runtime credentials" + assert any(payload["id"] == TERMINAL_SETUP_AUTH_METHOD_ID for payload in payloads) + terminal = next(payload for payload in payloads if payload["id"] == TERMINAL_SETUP_AUTH_METHOD_ID) + assert terminal["type"] == "terminal" + assert terminal["args"] == ["--setup"] + + def test_build_auth_methods_returns_terminal_setup_when_unconfigured(self, monkeypatch): + monkeypatch.setattr("acp_adapter.auth.detect_provider", lambda: None) + + methods = build_auth_methods() + payloads = [method.model_dump(by_alias=True, exclude_none=True) for method in methods] + + assert payloads == [ + { + "args": ["--setup"], + "description": ( + "Open Hermes' interactive model/provider setup in a terminal. " + "Use this when Hermes has not been configured on this machine yet." + ), + "id": TERMINAL_SETUP_AUTH_METHOD_ID, + "name": "Configure Hermes provider", + "type": "terminal", + } + ] diff --git a/tests/acp/test_entry.py b/tests/acp/test_entry.py index 760522c312a9..81d30cd868c3 100644 --- a/tests/acp/test_entry.py +++ b/tests/acp/test_entry.py @@ -1,6 +1,9 @@ """Tests for acp_adapter.entry startup wiring.""" +import sys + import acp +import pytest from acp_adapter import entry @@ -15,6 +18,179 @@ async def fake_run_agent(agent, **kwargs): monkeypatch.setattr(entry, "_load_env", lambda: None) monkeypatch.setattr(acp, "run_agent", fake_run_agent) - entry.main() + entry.main([]) assert calls["kwargs"]["use_unstable_protocol"] is True + + +def test_main_version_prints_without_starting_server(monkeypatch, capsys): + monkeypatch.setattr(entry, "_setup_logging", lambda: (_ for _ in ()).throw(AssertionError("started server"))) + + entry.main(["--version"]) + + output = capsys.readouterr().out.strip() + assert output + assert "Starting hermes-agent ACP adapter" not in output + + +def test_main_check_prints_ok_without_starting_server(monkeypatch, capsys): + monkeypatch.setattr(entry, "_setup_logging", lambda: (_ for _ in ()).throw(AssertionError("started server"))) + + entry.main(["--check"]) + + assert capsys.readouterr().out.strip() == "Hermes ACP check OK" + + +def test_main_setup_runs_model_configuration(monkeypatch): + calls = {} + + def fake_hermes_main(): + calls["argv"] = sys.argv[:] + + monkeypatch.setattr("hermes_cli.main.main", fake_hermes_main) + # Pretend stdin is not a TTY so the follow-up browser prompt is skipped. + # That keeps this test focused on the model-setup wiring; the + # browser-prompt path has its own test below. + monkeypatch.setattr("sys.stdin.isatty", lambda: False) + + entry.main(["--setup"]) + + assert calls["argv"][1:] == ["model"] + + +def test_main_setup_offers_browser_install_when_tty(monkeypatch): + """When stdin is a TTY and the user answers yes, model setup is followed + by a browser-tools bootstrap call.""" + monkeypatch.setattr("hermes_cli.main.main", lambda: None) + monkeypatch.setattr("sys.stdin.isatty", lambda: True) + monkeypatch.setattr("builtins.input", lambda *_args, **_kwargs: "y") + + bootstrap_calls = [] + monkeypatch.setattr( + entry, + "_run_setup_browser", + lambda assume_yes=False: bootstrap_calls.append(assume_yes) or 0, + ) + + entry.main(["--setup"]) + + assert bootstrap_calls == [False] + + +def test_main_setup_skips_browser_prompt_on_no(monkeypatch): + monkeypatch.setattr("hermes_cli.main.main", lambda: None) + monkeypatch.setattr("sys.stdin.isatty", lambda: True) + monkeypatch.setattr("builtins.input", lambda *_args, **_kwargs: "") + + called = [] + monkeypatch.setattr( + entry, + "_run_setup_browser", + lambda assume_yes=False: called.append(assume_yes) or 0, + ) + + entry.main(["--setup"]) + + assert called == [] + + +def test_main_setup_browser_invokes_bundled_script(monkeypatch): + """`hermes-acp --setup-browser` must shell out to the bundled bootstrap + script โ€” never reimplement the install logic inline.""" + monkeypatch.setattr("platform.system", lambda: "Linux") + + captured = {} + + def fake_run(cmd, check=False): + captured["cmd"] = cmd + + class _R: + returncode = 0 + + return _R() + + monkeypatch.setattr("subprocess.run", fake_run) + + entry.main(["--setup-browser"]) + + assert captured["cmd"][0] == "bash" + assert captured["cmd"][1].endswith("bootstrap_browser_tools.sh") + # --yes is NOT passed when the flag is absent. + assert "--yes" not in captured["cmd"] + + +def test_main_setup_browser_forwards_yes_flag(monkeypatch): + monkeypatch.setattr("platform.system", lambda: "Linux") + + captured = {} + + def fake_run(cmd, check=False): + captured["cmd"] = cmd + + class _R: + returncode = 0 + + return _R() + + monkeypatch.setattr("subprocess.run", fake_run) + + entry.main(["--setup-browser", "--yes"]) + + assert "--yes" in captured["cmd"] + + +def test_main_setup_browser_uses_powershell_on_windows(monkeypatch): + monkeypatch.setattr("platform.system", lambda: "Windows") + + captured = {} + + def fake_run(cmd, check=False): + captured["cmd"] = cmd + + class _R: + returncode = 0 + + return _R() + + monkeypatch.setattr("subprocess.run", fake_run) + + entry.main(["--setup-browser", "--yes"]) + + assert captured["cmd"][0] == "powershell.exe" + assert any(part.endswith("bootstrap_browser_tools.ps1") for part in captured["cmd"]) + assert "-Yes" in captured["cmd"] + + +def test_main_setup_browser_propagates_failure(monkeypatch): + monkeypatch.setattr("platform.system", lambda: "Linux") + + class _R: + returncode = 7 + + monkeypatch.setattr("subprocess.run", lambda cmd, check=False: _R()) + + with pytest.raises(SystemExit) as excinfo: + entry.main(["--setup-browser"]) + assert excinfo.value.code == 7 + + +def test_bootstrap_scripts_ship_with_package(): + """The package-data wiring (pyproject.toml) must include the bootstrap + scripts โ€” otherwise `--setup-browser` 404s at runtime.""" + from pathlib import Path + + bootstrap_dir = Path(entry.__file__).resolve().parent / "bootstrap" + sh = bootstrap_dir / "bootstrap_browser_tools.sh" + ps1 = bootstrap_dir / "bootstrap_browser_tools.ps1" + + assert sh.is_file(), f"missing bundled script: {sh}" + assert ps1.is_file(), f"missing bundled script: {ps1}" + + sh_text = sh.read_text(encoding="utf-8") + ps1_text = ps1.read_text(encoding="utf-8") + + # Sanity: scripts know how to find the Hermes-managed Node prefix. + assert "HERMES_HOME" in sh_text + assert "agent-browser" in sh_text + assert "HermesHome" in ps1_text + assert "agent-browser" in ps1_text diff --git a/tests/acp/test_registry_manifest.py b/tests/acp/test_registry_manifest.py new file mode 100644 index 000000000000..633b4a8494c0 --- /dev/null +++ b/tests/acp/test_registry_manifest.py @@ -0,0 +1,90 @@ +"""Tests for ACP Registry metadata shipped with Hermes.""" + +from __future__ import annotations + +import json +import re +import tomllib +from pathlib import Path +import xml.etree.ElementTree as ET + +ROOT = Path(__file__).resolve().parents[2] +MANIFEST = ROOT / "acp_registry" / "agent.json" +ICON = ROOT / "acp_registry" / "icon.svg" +FORBIDDEN_MANIFEST_KEYS = {"schema_version", "display_name"} +ALLOWED_DISTRIBUTIONS = {"binary", "npx", "uvx"} + + +def _manifest() -> dict: + return json.loads(MANIFEST.read_text(encoding="utf-8")) + + +def _pyproject_version() -> str: + data = tomllib.loads((ROOT / "pyproject.toml").read_text(encoding="utf-8")) + return data["project"]["version"] + + +def test_agent_json_matches_official_registry_required_fields(): + data = _manifest() + + assert FORBIDDEN_MANIFEST_KEYS.isdisjoint(data) + assert data["id"] == "hermes-agent" + assert re.fullmatch(r"[a-z][a-z0-9-]*", data["id"]) + assert data["name"] == "Hermes Agent" + assert data["description"] + assert data["repository"] == "https://github.com/NousResearch/hermes-agent" + assert data["website"].startswith("https://hermes-agent.nousresearch.com/") + assert data["authors"] == ["Nous Research"] + assert data["license"] == "MIT" + assert set(data["distribution"]) <= ALLOWED_DISTRIBUTIONS + + +def test_agent_json_uses_uvx_distribution_without_local_command_fields(): + data = _manifest() + + assert set(data["distribution"]) == {"uvx"} + uvx = data["distribution"]["uvx"] + # Schema allows {package, args, env}; we use {package, args}. + assert set(uvx) <= {"package", "args", "env"} + assert "package" in uvx + assert uvx["package"] == f"hermes-agent[acp]=={data['version']}" + assert uvx["args"] == ["hermes-acp"] + # Old command-shape fields must not leak back in. + assert "type" not in data["distribution"] + assert "command" not in data["distribution"] + + +def test_agent_json_version_matches_pyproject(): + assert _manifest()["version"] == _pyproject_version() + + +def test_agent_json_pins_uvx_package_to_pyproject_version(): + """The registry CI rejects ``@latest`` and floating pins; the manifest must + always reference the exact PyPI version listed in pyproject.toml.""" + assert _manifest()["distribution"]["uvx"]["package"] == ( + f"hermes-agent[acp]=={_pyproject_version()}" + ) + + +def test_icon_svg_is_16x16_current_color(): + root = ET.fromstring(ICON.read_text(encoding="utf-8")) + + assert root.attrib["viewBox"] == "0 0 16 16" + assert root.attrib["width"] == "16" + assert root.attrib["height"] == "16" + + +def test_icon_svg_has_no_hardcoded_colors_or_gradients(): + text = ICON.read_text(encoding="utf-8") + + assert "linearGradient" not in text + assert "radialGradient" not in text + assert "url(#" not in text + assert not re.search(r"#[0-9a-fA-F]{3,8}\b", text) + + root = ET.fromstring(text) + for element in root.iter(): + for attr in ("fill", "stroke"): + value = element.attrib.get(attr) + if value is not None: + assert value in {"currentColor", "none"} diff --git a/tests/acp/test_server.py b/tests/acp/test_server.py index a4dad4aefa87..6e2039d2b24c 100644 --- a/tests/acp/test_server.py +++ b/tests/acp/test_server.py @@ -33,6 +33,7 @@ UsageUpdate, UserMessageChunk, ) +from acp_adapter.auth import TERMINAL_SETUP_AUTH_METHOD_ID from acp_adapter.server import HermesACPAgent, HERMES_VERSION from acp_adapter.session import SessionManager from hermes_state import SessionDB @@ -92,6 +93,41 @@ async def test_initialize_capabilities_wire_format(self, agent): assert "list" in session_caps assert "resume" in session_caps + @pytest.mark.asyncio + async def test_initialize_advertises_provider_and_terminal_auth_methods(self, agent, monkeypatch): + monkeypatch.setattr("acp_adapter.auth.detect_provider", lambda: "openrouter") + monkeypatch.setattr("acp_adapter.server.detect_provider", lambda: "openrouter") + + resp = await agent.initialize(protocol_version=1) + payloads = [method.model_dump(by_alias=True, exclude_none=True) for method in resp.auth_methods] + + assert payloads[0]["id"] == "openrouter" + assert payloads[0]["name"] == "openrouter runtime credentials" + terminal = next(payload for payload in payloads if payload["id"] == TERMINAL_SETUP_AUTH_METHOD_ID) + assert terminal["type"] == "terminal" + assert terminal["args"] == ["--setup"] + + @pytest.mark.asyncio + async def test_initialize_advertises_terminal_setup_auth_when_no_provider(self, agent, monkeypatch): + monkeypatch.setattr("acp_adapter.auth.detect_provider", lambda: None) + monkeypatch.setattr("acp_adapter.server.detect_provider", lambda: None) + + resp = await agent.initialize(protocol_version=1) + payloads = [method.model_dump(by_alias=True, exclude_none=True) for method in resp.auth_methods] + + assert payloads == [ + { + "args": ["--setup"], + "description": ( + "Open Hermes' interactive model/provider setup in a terminal. " + "Use this when Hermes has not been configured on this machine yet." + ), + "id": TERMINAL_SETUP_AUTH_METHOD_ID, + "name": "Configure Hermes provider", + "type": "terminal", + } + ] + # --------------------------------------------------------------------------- # authenticate @@ -135,6 +171,24 @@ async def test_authenticate_without_provider(self, agent, monkeypatch): resp = await agent.authenticate(method_id="openrouter") assert resp is None + @pytest.mark.asyncio + async def test_authenticate_accepts_terminal_setup_after_provider_configured(self, agent, monkeypatch): + monkeypatch.setattr( + "acp_adapter.server.detect_provider", + lambda: "openrouter", + ) + resp = await agent.authenticate(method_id=TERMINAL_SETUP_AUTH_METHOD_ID) + assert isinstance(resp, AuthenticateResponse) + + @pytest.mark.asyncio + async def test_authenticate_rejects_terminal_setup_without_provider(self, agent, monkeypatch): + monkeypatch.setattr( + "acp_adapter.server.detect_provider", + lambda: None, + ) + resp = await agent.authenticate(method_id=TERMINAL_SETUP_AUTH_METHOD_ID) + assert resp is None + # --------------------------------------------------------------------------- # new_session / cancel / load / resume diff --git a/tests/agent/lsp/test_delta_key.py b/tests/agent/lsp/test_delta_key.py new file mode 100644 index 000000000000..d20eef1ee727 --- /dev/null +++ b/tests/agent/lsp/test_delta_key.py @@ -0,0 +1,262 @@ +"""Tests for cross-edit LSP delta filtering. + +The delta-filter contract spans three pieces: + + 1. ``agent.lsp.manager._diag_key`` โ€” strict equality key including + the diagnostic's position range. Two diagnostics with the same + content but different lines are NOT equal under this key (they + are genuinely different diagnostics). + 2. ``agent.lsp.range_shift.build_line_shift`` โ€” derives a function + mapping pre-edit line numbers to post-edit line numbers from a + pre/post text pair. + 3. ``agent.lsp.manager.LSPService.get_diagnostics_sync(line_shift=โ€ฆ)`` + โ€” applies the shift to baseline diagnostics before computing the + set-difference, so pre-existing errors at shifted lines hash + equal to their post-edit counterparts and get filtered out. + +These tests exercise the contract at the unit level; the E2E case +(real LSP server, real shift) is covered in test_service.py. +""" +from __future__ import annotations + +from agent.lsp.client import _diagnostic_key +from agent.lsp.manager import _diag_key +from agent.lsp.range_shift import ( + build_line_shift, + shift_baseline, + shift_diagnostic_range, +) + + +def _diag(*, line: int, message: str = "Undefined variable", + severity: int = 1, code: str = "reportUndefinedVariable", + source: str = "Pyright", end_line: int | None = None) -> dict: + if end_line is None: + end_line = line + return { + "severity": severity, + "code": code, + "source": source, + "message": message, + "range": { + "start": {"line": line, "character": 0}, + "end": {"line": end_line, "character": 10}, + }, + } + + +# ---------------------------------------------------------------------- +# _diag_key: strict equality (with range) +# ---------------------------------------------------------------------- + +def test_diag_key_treats_shifted_diagnostics_as_distinct(): + """Two diagnostics with the same message but at different lines hash + differently โ€” they are genuinely different diagnostics. The shift + map is what makes them equal AFTER remapping; the key itself stays + strict.""" + a = _diag(line=100) + b = _diag(line=200) + assert _diag_key(a) != _diag_key(b) + + +def test_diag_key_matches_client_key_for_shifted_baseline(): + """When a baseline diagnostic is remapped through a shift, its + _diag_key must match the corresponding post-edit diagnostic's key + at the same coordinates. This is the contract the delta filter + relies on.""" + pre = _diag(line=200) + # Edit deletes 14 lines above line 200, so the same error now + # appears at line 186 post-edit. + shift = lambda L: L - 14 if L >= 14 else L + shifted = shift_diagnostic_range(pre, shift) + assert shifted is not None + post = _diag(line=186) + assert _diag_key(shifted) == _diag_key(post) + + +def test_diag_key_distinguishes_message(): + a = _diag(line=100, message="foo") + b = _diag(line=100, message="bar") + assert _diag_key(a) != _diag_key(b) + + +def test_diag_key_distinguishes_severity(): + a = _diag(line=100, severity=1) + b = _diag(line=100, severity=2) + assert _diag_key(a) != _diag_key(b) + + +def test_diag_key_distinguishes_source(): + a = _diag(line=100, source="Pyright") + b = _diag(line=100, source="Ruff") + assert _diag_key(a) != _diag_key(b) + + +def test_diag_key_matches_client_key_byte_for_byte(): + """The manager-side and client-side keys must agree on diagnostic + identity โ€” they're used by two layers that need to round-trip the + same diagnostics through dedup and delta filtering.""" + d = _diag(line=42) + assert _diag_key(d) == _diagnostic_key(d) + + +# ---------------------------------------------------------------------- +# build_line_shift +# ---------------------------------------------------------------------- + +def test_shift_identity_for_identical_content(): + shift = build_line_shift("a\nb\nc\n", "a\nb\nc\n") + assert shift(0) == 0 + assert shift(1) == 1 + assert shift(2) == 2 + + +def test_shift_pure_deletion_above_line(): + """Delete 2 lines at the top; everything below shifts up by 2.""" + pre = "line0\nline1\nline2\nline3\nline4\n" + post = "line2\nline3\nline4\n" # deleted lines 0-1 + shift = build_line_shift(pre, post) + # Pre lines 0,1 โ†’ deleted โ†’ None + assert shift(0) is None + assert shift(1) is None + # Pre line 2 โ†’ post line 0 + assert shift(2) == 0 + # Pre line 4 โ†’ post line 2 + assert shift(4) == 2 + + +def test_shift_pure_insertion_above_line(): + """Insert 3 lines at the top; everything below shifts down by 3.""" + pre = "line0\nline1\nline2\n" + post = "new0\nnew1\nnew2\nline0\nline1\nline2\n" + shift = build_line_shift(pre, post) + # Pre lines unchanged in identity, shifted by 3 + assert shift(0) == 3 + assert shift(1) == 4 + assert shift(2) == 5 + + +def test_shift_replacement_in_middle(): + """Replace 2 lines in the middle with 1 line. Lines above + unchanged; lines below shift up by 1.""" + pre = "a\nb\nc\nd\ne\n" + post = "a\nb\nX\ne\n" # replaced lines 2,3 (c,d) with X + shift = build_line_shift(pre, post) + assert shift(0) == 0 # a โ†’ a + assert shift(1) == 1 # b โ†’ b + assert shift(2) is None # c โ†’ deleted + assert shift(3) is None # d โ†’ deleted + assert shift(4) == 3 # e โ†’ post line 3 + + +def test_shift_handles_empty_pre(): + """First write of a file: pre is empty, post has content. Nothing + to shift, so the function should be well-defined for empty pre.""" + shift = build_line_shift("", "hello\nworld\n") + # Any pre line falls past the end of an empty pre โ€” anchor at end of post + assert shift(0) == 1 + + +def test_shift_handles_empty_post(): + """File deleted to empty. Every pre line returns None.""" + shift = build_line_shift("line0\nline1\n", "") + assert shift(0) is None + assert shift(1) is None + + +# ---------------------------------------------------------------------- +# shift_diagnostic_range +# ---------------------------------------------------------------------- + +def test_shift_diag_remaps_start_and_end(): + pre = "a\nb\nc\nd\n" + post = "X\na\nb\nc\nd\n" # one line inserted at top + shift = build_line_shift(pre, post) + d = _diag(line=2, end_line=2) + remapped = shift_diagnostic_range(d, shift) + assert remapped is not None + assert remapped["range"]["start"]["line"] == 3 + assert remapped["range"]["end"]["line"] == 3 + + +def test_shift_diag_drops_diagnostic_in_deleted_region(): + pre = "a\nb\nc\nd\n" + post = "a\nd\n" # deleted lines 1,2 (b,c) + shift = build_line_shift(pre, post) + d = _diag(line=1) + assert shift_diagnostic_range(d, shift) is None + + +def test_shift_diag_does_not_mutate_original(): + pre = "a\nb\n" + post = "X\na\nb\n" + shift = build_line_shift(pre, post) + d = _diag(line=0) + original_line = d["range"]["start"]["line"] + _ = shift_diagnostic_range(d, shift) + assert d["range"]["start"]["line"] == original_line + + +def test_shift_baseline_drops_deleted_and_remaps_rest(): + pre = "a\nb\nc\nd\ne\n" + post = "a\ne\n" # deleted b,c,d + shift = build_line_shift(pre, post) + baseline = [ + _diag(line=0, message="err on a"), + _diag(line=1, message="err on b"), # โ†’ deleted + _diag(line=2, message="err on c"), # โ†’ deleted + _diag(line=4, message="err on e"), + ] + out = shift_baseline(baseline, shift) + assert [d["message"] for d in out] == ["err on a", "err on e"] + assert out[0]["range"]["start"]["line"] == 0 + assert out[1]["range"]["start"]["line"] == 1 + + +# ---------------------------------------------------------------------- +# End-to-end: simulate the delta-filter pipeline +# ---------------------------------------------------------------------- + +def test_pipeline_filters_shifted_baseline_under_strict_key(): + """The exact scenario the bug fix is for: an edit deletes lines, + every diagnostic below shifts, and the delta filter (strict key + + shifted baseline) correctly identifies them as pre-existing.""" + pre = "line0\nline1\nline2\nline3\nline4\nline5\nline6\nline7\nline8\nline9\n" + # Delete lines 2,3,4 โ€” pre-existing errors at lines 7,8 should + # appear at lines 4,5 post-edit and be filtered out. + post = "line0\nline1\nline5\nline6\nline7\nline8\nline9\n" + shift = build_line_shift(pre, post) + + baseline = [_diag(line=7, message="X"), _diag(line=8, message="Y")] + post_diags = [_diag(line=4, message="X"), _diag(line=5, message="Y")] + + shifted_baseline = shift_baseline(baseline, shift) + seen = {_diag_key(d) for d in shifted_baseline} + new_diags = [d for d in post_diags if _diag_key(d) not in seen] + + # Both errors were pre-existing โ€” filtered out. + assert new_diags == [] + + +def test_pipeline_preserves_new_instance_at_different_line(): + """The case content-only keys would miss: the model introduces a + SECOND instance of the same error class at a new location. The + new instance must surface.""" + pre = "good\ngood\ngood\n" + post = "good\nbad\ngood\nbad\n" # added 2 new error lines + shift = build_line_shift(pre, post) + + baseline = [_diag(line=0, message="bad style")] # pre-existing + post_diags = [ + _diag(line=0, message="bad style"), # pre-existing + _diag(line=1, message="bad style"), # NEW โ€” different line + _diag(line=3, message="bad style"), # NEW โ€” different line + ] + + shifted_baseline = shift_baseline(baseline, shift) + seen = {_diag_key(d) for d in shifted_baseline} + new_diags = [d for d in post_diags if _diag_key(d) not in seen] + + # Two genuinely new instances must be surfaced. + assert len(new_diags) == 2 + assert {d["range"]["start"]["line"] for d in new_diags} == {1, 3} diff --git a/tests/agent/lsp/test_service.py b/tests/agent/lsp/test_service.py index 6eed8f7fd993..952a8519adcd 100644 --- a/tests/agent/lsp/test_service.py +++ b/tests/agent/lsp/test_service.py @@ -130,6 +130,35 @@ def test_service_e2e_delta_filter(mock_pyright): svc.shutdown() +def test_service_e2e_delta_filter_with_line_shift(mock_pyright): + """End-to-end: an edit that shifts the diagnostic's line still + filters correctly when ``line_shift`` is supplied. + + The mock LSP server emits a fixed error at line 0; for this test + we don't need to actually shift the server's output โ€” we just + need to prove that supplying a line_shift through the API works + and doesn't break the existing delta path. The unit tests in + test_delta_key.py cover the shift semantics in detail. + """ + repo = mock_pyright + f = repo / "x.py" + f.write_text("print('hi')\n") + + svc = LSPService( + enabled=True, + wait_mode="document", + wait_timeout=3.0, + install_strategy="manual", + ) + try: + svc.snapshot_baseline(str(f)) + # Identity shift โ€” should behave exactly like no shift. + new_diags = svc.get_diagnostics_sync(str(f), line_shift=lambda L: L) + assert new_diags == [] + finally: + svc.shutdown() + + def test_service_status_includes_clients(mock_pyright): repo = mock_pyright f = repo / "x.py" diff --git a/tests/agent/test_context_compressor_summary_continuity.py b/tests/agent/test_context_compressor_summary_continuity.py index d9a273758347..d797b661f01e 100644 --- a/tests/agent/test_context_compressor_summary_continuity.py +++ b/tests/agent/test_context_compressor_summary_continuity.py @@ -27,10 +27,12 @@ def _messages_with_handoff(summary_body: str): return [ {"role": "system", "content": "system prompt"}, {"role": "user", "content": f"{SUMMARY_PREFIX}\n{summary_body}"}, + {"role": "assistant", "content": "handoff acknowledged after resume"}, {"role": "user", "content": "new user turn after resume"}, {"role": "assistant", "content": "new assistant work after resume"}, {"role": "user", "content": "more new work after resume"}, {"role": "assistant", "content": "latest tail response"}, + {"role": "user", "content": "final active request stays in protected tail"}, ] diff --git a/tests/cli/test_cli_init.py b/tests/cli/test_cli_init.py index ee5ffb390d13..8417d64e746a 100644 --- a/tests/cli/test_cli_init.py +++ b/tests/cli/test_cli_init.py @@ -319,6 +319,89 @@ def test_resume_without_target_lists_recent_sessions(self, capsys): assert "Checking Running Hermes Agent" in output assert "Use /resume to continue" in output + def test_sessions_command_no_args_lists_recent_sessions(self, capsys): + """/sessions with no args prints the recent-sessions table (TUI parity). + + Regression test: `sessions` was registered in the central command + registry and surfaced by /help and tab-completion, but the classic + CLI dispatcher had no elif branch for it, so the canonical name fell + through and printed `Unknown command: sessions`. + """ + cli = _make_cli() + cli.session_id = "current" + cli._session_db = MagicMock() + cli._session_db.list_sessions_rich.return_value = [ + { + "id": "20260401_201329_d85961", + "title": "Checking Running Hermes Agent", + "preview": "check running gateways for hermes agent", + "last_active": 0, + }, + ] + + # Drive it through the public dispatcher to also lock in the + # process_command wiring, not just the handler in isolation. + cli.process_command("/sessions") + output = capsys.readouterr().out + + assert "Unknown command" not in output + assert "Recent sessions" in output + assert "Checking Running Hermes Agent" in output + assert "20260401_201329_d85961" in output + + def test_sessions_list_subcommand_lists_recent_sessions(self, capsys): + """/sessions list is an explicit alias for the no-arg list view.""" + cli = _make_cli() + cli.session_id = "current" + cli._session_db = MagicMock() + cli._session_db.list_sessions_rich.return_value = [ + { + "id": "20260401_201329_d85961", + "title": "Checking Running Hermes Agent", + "preview": "check running gateways for hermes agent", + "last_active": 0, + }, + ] + + cli.process_command("/sessions list") + output = capsys.readouterr().out + + assert "Unknown command" not in output + assert "Recent sessions" in output + assert "Checking Running Hermes Agent" in output + + def test_sessions_with_target_delegates_to_resume(self): + """/sessions behaves identically to /resume . + + We intercept `_handle_resume_command` rather than the full resume + machinery (which would otherwise require simulating an entire session + switch). The contract under test is the dispatch wiring. + """ + cli = _make_cli() + with patch.object(cli, "_handle_resume_command") as mock_resume: + cli.process_command("/sessions Checking Running Hermes Agent") + + mock_resume.assert_called_once_with( + "/resume Checking Running Hermes Agent" + ) + + def test_sessions_command_is_dispatched(self): + """/sessions must hit _handle_sessions_command, not fall through. + + Direct test that the process_command elif chain routes the canonical + name to the handler. Without this wiring, /sessions printed + `Unknown command: sessions` even though it was a registered command. + """ + cli = _make_cli() + cli._session_db = None # exercise the no-db path too + + with patch.object(cli, "_handle_sessions_command") as mock_handler: + cli.process_command("/sessions") + + mock_handler.assert_called_once() + called_with = mock_handler.call_args.args[0] + assert called_with.lower().startswith("/sessions") + class TestRootLevelProviderOverride: """Root-level provider/base_url in config.yaml must NOT override model.provider.""" diff --git a/tests/cli/test_cli_light_mode.py b/tests/cli/test_cli_light_mode.py new file mode 100644 index 000000000000..bc5ca5128e05 --- /dev/null +++ b/tests/cli/test_cli_light_mode.py @@ -0,0 +1,154 @@ +"""Tests for the light-mode terminal detection + color remap in cli.py. + +Covers the env-override path and the SkinConfig.get_color() wrapper that +the resize / light-mode salvage installs at module import time. We don't +try to fake an OSC 11 reply โ€” the env-override branch short-circuits +before the terminal query, which is the path most users hit. +""" + +from __future__ import annotations + +import importlib + +import pytest + + +@pytest.fixture +def cli_mod(monkeypatch): + """Import cli with the light-mode cache cleared each test.""" + import cli as _cli + + # The module-level _install_skin_light_mode_hook() and import-time + # _detect_light_mode() prime ran once at first import. We just reset + # the detection cache so the per-test env override takes effect. + monkeypatch.setattr(_cli, "_LIGHT_MODE_CACHE", None) + return _cli + + +class TestLightModeDetection: + def test_hermes_light_env_true_forces_light(self, cli_mod, monkeypatch): + monkeypatch.setenv("HERMES_LIGHT", "1") + assert cli_mod._detect_light_mode() is True + + def test_hermes_light_env_false_forces_dark(self, cli_mod, monkeypatch): + monkeypatch.setenv("HERMES_LIGHT", "0") + # Also blank out other signals so nothing else flips it light. + monkeypatch.delenv("HERMES_TUI_LIGHT", raising=False) + monkeypatch.delenv("HERMES_TUI_THEME", raising=False) + monkeypatch.delenv("HERMES_TUI_BACKGROUND", raising=False) + monkeypatch.delenv("COLORFGBG", raising=False) + assert cli_mod._detect_light_mode() is False + + def test_theme_hint_light(self, cli_mod, monkeypatch): + monkeypatch.delenv("HERMES_LIGHT", raising=False) + monkeypatch.delenv("HERMES_TUI_LIGHT", raising=False) + monkeypatch.setenv("HERMES_TUI_THEME", "light") + assert cli_mod._detect_light_mode() is True + + def test_background_hex_hint_light(self, cli_mod, monkeypatch): + monkeypatch.delenv("HERMES_LIGHT", raising=False) + monkeypatch.delenv("HERMES_TUI_LIGHT", raising=False) + monkeypatch.delenv("HERMES_TUI_THEME", raising=False) + monkeypatch.setenv("HERMES_TUI_BACKGROUND", "#FFFFFF") + assert cli_mod._detect_light_mode() is True + + def test_background_hex_hint_dark(self, cli_mod, monkeypatch): + monkeypatch.delenv("HERMES_LIGHT", raising=False) + monkeypatch.delenv("HERMES_TUI_LIGHT", raising=False) + monkeypatch.delenv("HERMES_TUI_THEME", raising=False) + monkeypatch.setenv("HERMES_TUI_BACKGROUND", "#1a1a2e") + monkeypatch.delenv("COLORFGBG", raising=False) + assert cli_mod._detect_light_mode() is False + + def test_colorfgbg_light_bg_slot(self, cli_mod, monkeypatch): + monkeypatch.delenv("HERMES_LIGHT", raising=False) + monkeypatch.delenv("HERMES_TUI_LIGHT", raising=False) + monkeypatch.delenv("HERMES_TUI_THEME", raising=False) + monkeypatch.delenv("HERMES_TUI_BACKGROUND", raising=False) + monkeypatch.setenv("COLORFGBG", "0;15") # bg slot 15 = light + assert cli_mod._detect_light_mode() is True + + def test_cache_is_sticky(self, cli_mod, monkeypatch): + monkeypatch.setenv("HERMES_LIGHT", "1") + assert cli_mod._detect_light_mode() is True + # Even if the env flips, the cached result wins until reset. + monkeypatch.setenv("HERMES_LIGHT", "0") + assert cli_mod._detect_light_mode() is True + + +class TestLightModeRemap: + def test_remap_no_op_in_dark_mode(self, cli_mod, monkeypatch): + monkeypatch.setenv("HERMES_LIGHT", "0") + # Cache is None from the fixture; first call sticks at False. + assert cli_mod._maybe_remap_for_light_mode("#FFF8DC") == "#FFF8DC" + + def test_remap_known_dark_color(self, cli_mod, monkeypatch): + monkeypatch.setenv("HERMES_LIGHT", "1") + # Force the detect cache to True for this test. + cli_mod._LIGHT_MODE_CACHE = True + assert cli_mod._maybe_remap_for_light_mode("#FFF8DC") == "#1A1A1A" + assert cli_mod._maybe_remap_for_light_mode("#FFD700") == "#9A6B00" + + def test_remap_case_insensitive(self, cli_mod, monkeypatch): + cli_mod._LIGHT_MODE_CACHE = True + # Lowercase input should still remap. + assert cli_mod._maybe_remap_for_light_mode("#fff8dc") == "#1A1A1A" + + def test_remap_unknown_color_passthrough(self, cli_mod, monkeypatch): + cli_mod._LIGHT_MODE_CACHE = True + # A color not in the remap table is returned unchanged. + assert cli_mod._maybe_remap_for_light_mode("#ABCDEF") == "#ABCDEF" + + def test_remap_skips_statusbar_paired_colors(self, cli_mod, monkeypatch): + """Colors that live on a dark bg (status bar fg) MUST NOT be + remapped โ€” otherwise they go dark-on-dark and disappear. + + Regression guard for the patch-11 fix (intentional table omission). + """ + cli_mod._LIGHT_MODE_CACHE = True + for fg in ("#C0C0C0", "#888888", "#555555", "#8B8682"): + assert cli_mod._maybe_remap_for_light_mode(fg) == fg, ( + f"{fg} is a status-bar fg paired with dark bg; remapping it " + "would produce dark-on-dark" + ) + + +class TestSkinConfigHook: + """The salvage wraps SkinConfig.get_color at module import time so + every skin color read goes through the light-mode remap. Verify + the hook installed and functions correctly. + """ + + def test_hook_installed(self, cli_mod): + from hermes_cli.skin_engine import SkinConfig + + assert getattr(SkinConfig, "_hermes_light_mode_hook_installed", False) is True + + def test_hook_is_idempotent(self, cli_mod): + # Calling the installer twice must not double-wrap (the marker + # attribute is the guard). + from hermes_cli.skin_engine import SkinConfig + + before = SkinConfig.get_color + cli_mod._install_skin_light_mode_hook() + after = SkinConfig.get_color + assert before is after + + def test_skin_color_remaps_through_wrapper_in_light_mode(self, cli_mod, monkeypatch): + from hermes_cli.skin_engine import SkinConfig + + cli_mod._LIGHT_MODE_CACHE = True + skin = SkinConfig( + name="test", + colors={"banner_text": "#FFF8DC", "response_border": "#FFD700"}, + ) + # The wrapper kicks in at get_color, not at construction time. + assert skin.get_color("banner_text") == "#1A1A1A" + assert skin.get_color("response_border") == "#9A6B00" + + def test_skin_color_passthrough_in_dark_mode(self, cli_mod, monkeypatch): + from hermes_cli.skin_engine import SkinConfig + + cli_mod._LIGHT_MODE_CACHE = False + skin = SkinConfig(name="test", colors={"banner_text": "#FFF8DC"}) + assert skin.get_color("banner_text") == "#FFF8DC" diff --git a/tests/cli/test_cli_status_bar.py b/tests/cli/test_cli_status_bar.py index 445626fac9be..47bd68aa25d1 100644 --- a/tests/cli/test_cli_status_bar.py +++ b/tests/cli/test_cli_status_bar.py @@ -349,20 +349,27 @@ def test_input_rules_hide_after_resize_until_next_input(self): assert cli_obj._tui_input_rule_height("top", width=90) == 1 assert cli_obj._tui_input_rule_height("bottom", width=90) == 1 - def test_scrollback_box_width_caps_to_resize_safe_value(self): - """Decorative scrollback boxes clamp to a width small enough that - moderate terminal shrinks don't cause reflow into scrollback.""" + def test_scrollback_box_width_returns_viewport_width(self): + """Decorative scrollback boxes use the full viewport width. + + The previous clamp (max 56 cols) was reverted in favour of the + prompt_toolkit ``_output_screen_diff`` monkey-patch landed in + #26137, which keeps chrome out of scrollback at the source. + We accept that an aggressive column-shrink may visually reflow + already printed Panel borders โ€” that's a cosmetic artifact of + stamped scrollback history, not a live-render bug. + """ from cli import HermesCLI - # Floor at 32 โ€” narrow terminals still get something usable. + # Floor at 32 โ€” narrow terminals still get something usable + # (avoids negative ``'โ”€' * (w - 2)`` math). assert HermesCLI._scrollback_box_width(20) == 32 assert HermesCLI._scrollback_box_width(32) == 32 - # Cap at 56 โ€” wide terminals don't get full-width boxes. - assert HermesCLI._scrollback_box_width(80) == 56 - assert HermesCLI._scrollback_box_width(120) == 56 - assert HermesCLI._scrollback_box_width(200) == 56 - # Mid-range passes through up to the cap. + # Above the floor, return the actual viewport width โ€” no cap. assert HermesCLI._scrollback_box_width(48) == 48 + assert HermesCLI._scrollback_box_width(80) == 80 + assert HermesCLI._scrollback_box_width(120) == 120 + assert HermesCLI._scrollback_box_width(200) == 200 def test_agent_spacer_reclaimed_on_narrow_terminals(self): cli_obj = _make_cli() diff --git a/tests/conftest.py b/tests/conftest.py index 5d7f197f195f..aa2b1b1fbcb9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -101,7 +101,6 @@ "RETAINDB_API_KEY", "HINDSIGHT_API_KEY", "HINDSIGHT_LLM_API_KEY", - "TINKER_API_KEY", "DAYTONA_API_KEY", "TWILIO_AUTH_TOKEN", "TELEGRAM_BOT_TOKEN", @@ -476,12 +475,14 @@ def _reset_module_state(): except Exception: pass - # --- agent.auxiliary_client โ€” runtime main provider/model override --- - # Set per-turn by AIAgent.run_conversation; tests that import it must - # see a clean state so config.yaml fallback works as expected. + # --- agent.auxiliary_client โ€” runtime main provider/model override and + # payment-error health cache. Both are process-global in production; + # reset them per test so one worker's fallback/402 test does not make + # later auxiliary-client tests skip otherwise-available providers. try: from agent import auxiliary_client as _aux_mod _aux_mod.clear_runtime_main() + _aux_mod._reset_aux_unhealthy_cache() except Exception: pass diff --git a/tests/cron/test_jobs.py b/tests/cron/test_jobs.py index af42ca444b26..16c56cd62200 100644 --- a/tests/cron/test_jobs.py +++ b/tests/cron/test_jobs.py @@ -321,6 +321,93 @@ def test_resume_reenables_job(self, tmp_cron_dir): assert resumed["paused_reason"] is None +class TestResolveJobRef: + """Name-based job lookup for CLI/tool callers (PR #2627, @buntingszn).""" + + def test_resolve_by_exact_id(self, tmp_cron_dir): + from cron.jobs import resolve_job_ref + + job = create_job(prompt="A", schedule="1h", name="alpha") + assert resolve_job_ref(job["id"])["id"] == job["id"] + + def test_resolve_by_name(self, tmp_cron_dir): + from cron.jobs import resolve_job_ref + + job = create_job(prompt="A", schedule="1h", name="alpha") + assert resolve_job_ref("alpha")["id"] == job["id"] + + def test_resolve_by_name_case_insensitive(self, tmp_cron_dir): + from cron.jobs import resolve_job_ref + + job = create_job(prompt="A", schedule="1h", name="MyJob") + assert resolve_job_ref("myjob")["id"] == job["id"] + assert resolve_job_ref("MYJOB")["id"] == job["id"] + + def test_resolve_returns_none_when_not_found(self, tmp_cron_dir): + from cron.jobs import resolve_job_ref + + create_job(prompt="A", schedule="1h", name="alpha") + assert resolve_job_ref("does-not-exist") is None + assert resolve_job_ref("") is None + + def test_resolve_id_wins_over_name(self, tmp_cron_dir): + """If a job's name happens to equal another job's ID, ID match wins.""" + from cron.jobs import resolve_job_ref + + j1 = create_job(prompt="A", schedule="1h") + # Create a second job whose name is j1's ID + j2 = create_job(prompt="B", schedule="1h", name=j1["id"]) + # Looking up j1["id"] must return j1, not the colliding-name job j2 + assert resolve_job_ref(j1["id"])["id"] == j1["id"] + assert resolve_job_ref(j1["id"])["id"] != j2["id"] + + def test_resolve_ambiguous_name_raises(self, tmp_cron_dir): + """Two jobs sharing a name โ†’ refuse to pick, surface both IDs.""" + from cron.jobs import AmbiguousJobReference, resolve_job_ref + + j1 = create_job(prompt="A", schedule="1h", name="dup") + j2 = create_job(prompt="B", schedule="1h", name="dup") + with pytest.raises(AmbiguousJobReference) as exc_info: + resolve_job_ref("dup") + ids = {m["id"] for m in exc_info.value.matches} + assert ids == {j1["id"], j2["id"]} + # Error message mentions both IDs so the user can pick one + assert j1["id"] in str(exc_info.value) + assert j2["id"] in str(exc_info.value) + + def test_trigger_by_name(self, tmp_cron_dir): + from cron.jobs import trigger_job + + job = create_job(prompt="A", schedule="1h", name="alpha") + result = trigger_job("alpha") + assert result is not None + assert result["id"] == job["id"] + + def test_pause_by_name(self, tmp_cron_dir): + job = create_job(prompt="A", schedule="1h", name="alpha") + result = pause_job("alpha", reason="manual") + assert result is not None + assert result["id"] == job["id"] + assert result["state"] == "paused" + + def test_remove_by_name(self, tmp_cron_dir): + job = create_job(prompt="A", schedule="1h", name="alpha") + assert remove_job("alpha") is True + assert get_job(job["id"]) is None + + def test_mutations_refuse_ambiguous_name(self, tmp_cron_dir): + """pause/resume/trigger/remove must refuse to act on an ambiguous name.""" + from cron.jobs import AmbiguousJobReference, trigger_job + + create_job(prompt="A", schedule="1h", name="dup") + create_job(prompt="B", schedule="1h", name="dup") + for fn in (pause_job, resume_job, trigger_job): + with pytest.raises(AmbiguousJobReference): + fn("dup") + with pytest.raises(AmbiguousJobReference): + remove_job("dup") + + class TestMarkJobRun: def test_increments_completed(self, tmp_cron_dir): job = create_job(prompt="Test", schedule="every 1h") diff --git a/tests/environments/benchmarks/test_terminalbench2_env_security.py b/tests/environments/benchmarks/test_terminalbench2_env_security.py deleted file mode 100644 index b2610757762d..000000000000 --- a/tests/environments/benchmarks/test_terminalbench2_env_security.py +++ /dev/null @@ -1,164 +0,0 @@ -"""Security tests for Terminal-Bench 2 archive extraction.""" - -import base64 -import importlib -import io -import sys -import tarfile -import types - -import pytest - - -def _stub_module(name: str, **attrs): - module = types.ModuleType(name) - for key, value in attrs.items(): - setattr(module, key, value) - return module - - -def _load_terminalbench_module(monkeypatch): - class _EvalHandlingEnum: - STOP_TRAIN = "stop_train" - - class _APIServerConfig: - def __init__(self, *args, **kwargs): - self.args = args - self.kwargs = kwargs - - class _AgentResult: - pass - - class _HermesAgentLoop: - pass - - class _HermesAgentBaseEnv: - pass - - class _HermesAgentEnvConfig: - pass - - class _ToolContext: - pass - - stub_modules = { - "atroposlib": _stub_module("atroposlib"), - "atroposlib.envs": _stub_module("atroposlib.envs"), - "atroposlib.envs.base": _stub_module( - "atroposlib.envs.base", - EvalHandlingEnum=_EvalHandlingEnum, - ), - "atroposlib.envs.server_handling": _stub_module("atroposlib.envs.server_handling"), - "atroposlib.envs.server_handling.server_manager": _stub_module( - "atroposlib.envs.server_handling.server_manager", - APIServerConfig=_APIServerConfig, - ), - "environments.agent_loop": _stub_module( - "environments.agent_loop", - AgentResult=_AgentResult, - HermesAgentLoop=_HermesAgentLoop, - ), - "environments.hermes_base_env": _stub_module( - "environments.hermes_base_env", - HermesAgentBaseEnv=_HermesAgentBaseEnv, - HermesAgentEnvConfig=_HermesAgentEnvConfig, - ), - "environments.tool_context": _stub_module( - "environments.tool_context", - ToolContext=_ToolContext, - ), - "tools.terminal_tool": _stub_module( - "tools.terminal_tool", - register_task_env_overrides=lambda *args, **kwargs: None, - clear_task_env_overrides=lambda *args, **kwargs: None, - cleanup_vm=lambda *args, **kwargs: None, - ), - } - - stub_modules["atroposlib"].envs = stub_modules["atroposlib.envs"] - stub_modules["atroposlib.envs"].base = stub_modules["atroposlib.envs.base"] - stub_modules["atroposlib.envs"].server_handling = stub_modules["atroposlib.envs.server_handling"] - stub_modules["atroposlib.envs.server_handling"].server_manager = stub_modules[ - "atroposlib.envs.server_handling.server_manager" - ] - - for name, module in stub_modules.items(): - monkeypatch.setitem(sys.modules, name, module) - - module_name = "environments.benchmarks.terminalbench_2.terminalbench2_env" - sys.modules.pop(module_name, None) - return importlib.import_module(module_name) - - -def _build_tar_b64(entries): - buf = io.BytesIO() - with tarfile.open(fileobj=buf, mode="w:gz") as tar: - for entry in entries: - kind = entry["kind"] - info = tarfile.TarInfo(entry["name"]) - - if kind == "dir": - info.type = tarfile.DIRTYPE - tar.addfile(info) - continue - - if kind == "file": - data = entry["data"].encode("utf-8") - info.size = len(data) - tar.addfile(info, io.BytesIO(data)) - continue - - if kind == "symlink": - info.type = tarfile.SYMTYPE - info.linkname = entry["target"] - tar.addfile(info) - continue - - raise ValueError(f"Unknown tar entry kind: {kind}") - - return base64.b64encode(buf.getvalue()).decode("ascii") - - -def test_extract_base64_tar_allows_safe_files(tmp_path, monkeypatch): - module = _load_terminalbench_module(monkeypatch) - archive = _build_tar_b64( - [ - {"kind": "dir", "name": "nested"}, - {"kind": "file", "name": "nested/hello.txt", "data": "hello"}, - ] - ) - - target = tmp_path / "extract" - module._extract_base64_tar(archive, target) - - assert (target / "nested" / "hello.txt").read_text(encoding="utf-8") == "hello" - - -def test_extract_base64_tar_rejects_path_traversal(tmp_path, monkeypatch): - module = _load_terminalbench_module(monkeypatch) - archive = _build_tar_b64( - [ - {"kind": "file", "name": "../escape.txt", "data": "owned"}, - ] - ) - - target = tmp_path / "extract" - with pytest.raises(ValueError, match="Unsafe archive member path"): - module._extract_base64_tar(archive, target) - - assert not (tmp_path / "escape.txt").exists() - - -def test_extract_base64_tar_rejects_symlinks(tmp_path, monkeypatch): - module = _load_terminalbench_module(monkeypatch) - archive = _build_tar_b64( - [ - {"kind": "symlink", "name": "link", "target": "../../escape.txt"}, - ] - ) - - target = tmp_path / "extract" - with pytest.raises(ValueError, match="Unsupported archive member type"): - module._extract_base64_tar(archive, target) - - assert not (target / "link").exists() diff --git a/tests/gateway/test_api_server.py b/tests/gateway/test_api_server.py index 66b304fff516..032af7109a5d 100644 --- a/tests/gateway/test_api_server.py +++ b/tests/gateway/test_api_server.py @@ -105,6 +105,29 @@ def test_delete_missing(self): store = ResponseStore(max_size=10) assert store.delete("resp_missing") is False + def test_delete_clears_conversation_mapping(self): + """Deleting a response also removes conversation mappings that reference it.""" + store = ResponseStore(max_size=10) + store.put("resp_1", {"output": "hello"}) + store.set_conversation("chat-a", "resp_1") + assert store.get_conversation("chat-a") == "resp_1" + store.delete("resp_1") + assert store.get_conversation("chat-a") is None + + def test_eviction_clears_conversation_mapping(self): + """LRU eviction also removes conversation mappings for evicted responses.""" + store = ResponseStore(max_size=2) + store.put("resp_1", {"output": "one"}) + store.set_conversation("chat-a", "resp_1") + store.put("resp_2", {"output": "two"}) + store.set_conversation("chat-b", "resp_2") + # Adding a 3rd should evict resp_1 and its conversation mapping + store.put("resp_3", {"output": "three"}) + assert store.get("resp_1") is None + assert store.get_conversation("chat-a") is None + # resp_2 mapping should still be intact + assert store.get_conversation("chat-b") == "resp_2" + # --------------------------------------------------------------------------- # _IdempotencyCache @@ -2870,6 +2893,45 @@ async def test_conversation_store_false_no_mapping(self, adapter): # Conversation mapping should NOT be set since store=false assert adapter._response_store.get_conversation("ephemeral-chat") is None + @pytest.mark.asyncio + async def test_conversation_reuse_after_eviction_no_404(self, adapter): + """After eviction clears a conversation mapping, reusing that name starts fresh (no 404).""" + adapter._response_store = ResponseStore(max_size=1) + app = _create_app(adapter) + async with TestClient(TestServer(app)) as cli: + with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run: + mock_run.return_value = ( + {"final_response": "First", "messages": [], "api_calls": 1}, + {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}, + ) + # Create conversation -> resp stored + resp1 = await cli.post("/v1/responses", json={ + "input": "hello", + "conversation": "my-chat", + }) + assert resp1.status == 200 + + # Evict by adding another response + mock_run.return_value = ( + {"final_response": "Other", "messages": [], "api_calls": 1}, + {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}, + ) + await cli.post("/v1/responses", json={"input": "other"}) + + # Conversation mapping should have been cleaned by eviction + assert adapter._response_store.get_conversation("my-chat") is None + + # Reuse conversation name โ€” should start fresh, not 404 + mock_run.return_value = ( + {"final_response": "Restarted", "messages": [], "api_calls": 1}, + {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}, + ) + resp3 = await cli.post("/v1/responses", json={ + "input": "hello again", + "conversation": "my-chat", + }) + assert resp3.status == 200 + # --------------------------------------------------------------------------- # X-Hermes-Session-Id header (session continuity) diff --git a/tests/gateway/test_config.py b/tests/gateway/test_config.py index aae3c9e5880a..cf197bd6f7f5 100644 --- a/tests/gateway/test_config.py +++ b/tests/gateway/test_config.py @@ -409,6 +409,26 @@ def test_bridges_discord_channel_prompts_from_config_yaml(self, tmp_path, monkey "456": "Therapist mode", } + def test_bridges_discord_history_backfill_settings_from_config_yaml(self, tmp_path, monkeypatch): + hermes_home = tmp_path / ".hermes" + hermes_home.mkdir() + config_path = hermes_home / "config.yaml" + config_path.write_text( + "discord:\n" + " history_backfill: true\n" + " history_backfill_limit: 17\n", + encoding="utf-8", + ) + + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + monkeypatch.delenv("DISCORD_HISTORY_BACKFILL", raising=False) + monkeypatch.delenv("DISCORD_HISTORY_BACKFILL_LIMIT", raising=False) + + load_gateway_config() + + assert os.getenv("DISCORD_HISTORY_BACKFILL") == "true" + assert os.getenv("DISCORD_HISTORY_BACKFILL_LIMIT") == "17" + def test_bridges_telegram_channel_prompts_from_config_yaml(self, tmp_path, monkeypatch): hermes_home = tmp_path / ".hermes" hermes_home.mkdir() diff --git a/tests/gateway/test_discord_free_response.py b/tests/gateway/test_discord_free_response.py index 57198b9e73af..c69af3e7781c 100644 --- a/tests/gateway/test_discord_free_response.py +++ b/tests/gateway/test_discord_free_response.py @@ -62,6 +62,12 @@ def __init__(self, channel_id: int = 1, name: str = "general", guild_name: str = self.guild = SimpleNamespace(name=guild_name) self.topic = None + def history(self, *, limit, before, after=None, oldest_first=None): + async def _iter(): + return + yield + return _iter() + class FakeForumChannel: def __init__(self, channel_id: int = 1, name: str = "support-forum", guild_name: str = "Hermes Server"): @@ -81,6 +87,12 @@ def __init__(self, channel_id: int = 1, name: str = "thread", parent=None, guild self.guild = getattr(parent, "guild", None) or SimpleNamespace(name=guild_name) self.topic = None + def history(self, *, limit, before, after=None, oldest_first=None): + async def _iter(): + return + yield + return _iter() + @pytest.fixture def adapter(monkeypatch): @@ -99,6 +111,9 @@ def adapter(monkeypatch): "DISCORD_NO_THREAD_CHANNELS", "DISCORD_ALLOWED_CHANNELS", "DISCORD_IGNORED_CHANNELS", + "DISCORD_HISTORY_BACKFILL", + "DISCORD_HISTORY_BACKFILL_LIMIT", + "DISCORD_ALLOW_BOTS", ): monkeypatch.delenv(_var, raising=False) @@ -125,6 +140,48 @@ def make_message(*, channel, content: str, mentions=None, msg_type=None): ) +def make_history_message( + *, + author, + content: str, + msg_id: int, + msg_type=None, + attachments=None, +): + return SimpleNamespace( + id=msg_id, + author=author, + content=content, + attachments=list(attachments or []), + type=msg_type if msg_type is not None else discord_platform.discord.MessageType.default, + ) + + +class FakeHistoryChannel(FakeTextChannel): + def __init__(self, history_messages, **kwargs): + super().__init__(**kwargs) + self._history_messages = list(history_messages) + + def history(self, *, limit, before, after=None, oldest_first=None): + before_id = int(getattr(before, "id", before)) + after_id = int(getattr(after, "id", after)) if after is not None else None + if oldest_first is None: + oldest_first = after is not None + + messages = [ + message for message in self._history_messages + if int(message.id) < before_id + and (after_id is None or int(message.id) > after_id) + ] + messages.sort(key=lambda message: int(message.id), reverse=not oldest_first) + + async def _iter(): + for message in messages[:limit]: + yield message + + return _iter() + + @pytest.mark.asyncio async def test_discord_defaults_to_require_mention(adapter, monkeypatch): """Default behavior: require @mention in server channels.""" @@ -578,3 +635,252 @@ async def test_discord_thread_require_mention_via_config_extra(adapter, monkeypa await adapter._handle_message(message) adapter.handle_message.assert_not_awaited() + + + +@pytest.mark.asyncio +async def test_fetch_channel_context_stops_at_self_message_and_reverses_to_chronological_order(adapter, monkeypatch): + monkeypatch.setenv("DISCORD_ALLOW_BOTS", "all") + adapter.config.extra["history_backfill_limit"] = 10 + + other_bot = SimpleNamespace(id=55, display_name="Gemini", name="Gemini", bot=True) + human = SimpleNamespace(id=56, display_name="Alice", name="Alice", bot=False) + old_human = SimpleNamespace(id=57, display_name="Bob", name="Bob", bot=False) + + channel = FakeHistoryChannel( + [ + make_history_message(author=human, content="latest human note", msg_id=4), + make_history_message(author=other_bot, content="latest bot note", msg_id=3), + make_history_message(author=adapter._client.user, content="our prior response", msg_id=2), + make_history_message(author=old_human, content="older than boundary", msg_id=1), + ], + channel_id=123, + ) + + result = await adapter._fetch_channel_context(channel, before=make_message(channel=channel, content="trigger")) + + assert result == ( + "[Recent channel messages]\n" + "[Gemini [bot]] latest bot note\n" + "[Alice] latest human note" + ) + + +@pytest.mark.asyncio +async def test_fetch_channel_context_skips_other_bots_when_allow_bots_none(adapter, monkeypatch): + monkeypatch.setenv("DISCORD_ALLOW_BOTS", "none") + adapter.config.extra["history_backfill_limit"] = 10 + + other_bot = SimpleNamespace(id=55, display_name="Gemini", name="Gemini", bot=True) + human = SimpleNamespace(id=56, display_name="Alice", name="Alice", bot=False) + + channel = FakeHistoryChannel( + [ + make_history_message(author=human, content="human note", msg_id=3), + make_history_message(author=other_bot, content="bot note", msg_id=2), + ], + channel_id=123, + ) + + result = await adapter._fetch_channel_context(channel, before=make_message(channel=channel, content="trigger")) + + assert result == "[Recent channel messages]\n[Alice] human note" + + +@pytest.mark.asyncio +async def test_fetch_channel_context_uses_cache_to_narrow_window(adapter, monkeypatch): + """When _last_self_message_id is cached, the fetch passes after= to skip old messages.""" + monkeypatch.setenv("DISCORD_ALLOW_BOTS", "all") + adapter.config.extra["history_backfill_limit"] = 50 + + human = SimpleNamespace(id=56, display_name="Alice", name="Alice", bot=False) + + # Record the after= arg passed to history() + recorded_after = {} + + class CacheTrackingChannel(FakeHistoryChannel): + def history(self, *, limit, before, after=None, oldest_first=None): + recorded_after["value"] = after + return super().history( + limit=limit, + before=before, + after=after, + oldest_first=oldest_first, + ) + + channel = CacheTrackingChannel( + [make_history_message(author=human, content="hello", msg_id=200)], + channel_id=777, + ) + + # Seed the cache โ€” bot's last message in this channel was ID 100 + adapter._last_self_message_id["777"] = "100" + + trigger = make_message(channel=channel, content="trigger") + trigger.id = 300 # trigger is newer than cache + + result = await adapter._fetch_channel_context(channel, before=trigger) + + assert result == "[Recent channel messages]\n[Alice] hello" + # Verify cache was used: after= should be set (not None) + assert recorded_after["value"] is not None + + +@pytest.mark.asyncio +async def test_fetch_channel_context_cache_uses_latest_window_when_after_set(adapter, monkeypatch): + """Regression: discord.py defaults oldest_first=True when after= is provided. + + The hot cache path passes both after= and before=. We still want the latest + messages before the trigger, not the earliest messages after our prior + response, otherwise tool traces can crowd out the final answer. + """ + monkeypatch.setenv("DISCORD_ALLOW_BOTS", "all") + adapter.config.extra["history_backfill_limit"] = 3 + + codex = SimpleNamespace(id=56, display_name="Codex", name="Codex", bot=True) + human = SimpleNamespace(id=57, display_name="Alice", name="Alice", bot=False) + + channel = FakeHistoryChannel( + [ + make_history_message(author=codex, content="old tool trace 1", msg_id=101), + make_history_message(author=codex, content="old tool trace 2", msg_id=102), + make_history_message(author=codex, content="old tool trace 3", msg_id=103), + make_history_message(author=codex, content="final analysis", msg_id=104), + make_history_message(author=human, content="latest follow-up", msg_id=105), + ], + channel_id=777, + ) + adapter._last_self_message_id["777"] = "100" + + trigger = make_message(channel=channel, content="trigger") + trigger.id = 200 + + result = await adapter._fetch_channel_context(channel, before=trigger) + + assert "[Codex [bot]] final analysis" in result + assert "[Alice] latest follow-up" in result + assert "old tool trace 1" not in result + assert "old tool trace 2" not in result + + +@pytest.mark.asyncio +async def test_fetch_channel_context_ignores_stale_cache(adapter, monkeypatch): + """If cached ID is >= trigger ID (stale/future), fall back to cold-start scan.""" + monkeypatch.setenv("DISCORD_ALLOW_BOTS", "all") + adapter.config.extra["history_backfill_limit"] = 50 + + human = SimpleNamespace(id=56, display_name="Alice", name="Alice", bot=False) + + recorded_after = {} + + class CacheTrackingChannel(FakeHistoryChannel): + def history(self, *, limit, before, after=None, oldest_first=None): + recorded_after["value"] = after + return super().history( + limit=limit, + before=before, + after=after, + oldest_first=oldest_first, + ) + + channel = CacheTrackingChannel( + [make_history_message(author=human, content="hello", msg_id=50)], + channel_id=777, + ) + + # Cache has a NEWER ID than the trigger โ€” stale/invalid + adapter._last_self_message_id["777"] = "500" + + trigger = make_message(channel=channel, content="trigger") + trigger.id = 300 + + result = await adapter._fetch_channel_context(channel, before=trigger) + + assert result == "[Recent channel messages]\n[Alice] hello" + # Cache should have been ignored โ€” after= should be None + assert recorded_after["value"] is None + + +@pytest.mark.asyncio +async def test_discord_shared_channel_backfill_prepends_context(adapter, monkeypatch): + monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "true") + monkeypatch.delenv("DISCORD_FREE_RESPONSE_CHANNELS", raising=False) + monkeypatch.setenv("DISCORD_AUTO_THREAD", "false") + adapter.config.extra["group_sessions_per_user"] = False + adapter.config.extra["history_backfill"] = True + adapter._fetch_channel_context = AsyncMock(return_value="[Recent channel messages]\n[Alice] context") + + bot_user = adapter._client.user + message = make_message( + channel=FakeTextChannel(channel_id=321), + content=f"<@{bot_user.id}> hello with mention", + mentions=[bot_user], + ) + + await adapter._handle_message(message) + + adapter._fetch_channel_context.assert_awaited_once() + event = adapter.handle_message.await_args.args[0] + assert event.text == "hello with mention" + assert event.channel_context == "[Recent channel messages]\n[Alice] context" + + +@pytest.mark.asyncio +async def test_discord_per_user_channel_backfills_too(adapter, monkeypatch): + """Per-user sessions also benefit from backfill: Alice's session is missing + other-channel-participants' context and her own pre-mention messages.""" + monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "true") + monkeypatch.delenv("DISCORD_FREE_RESPONSE_CHANNELS", raising=False) + monkeypatch.setenv("DISCORD_AUTO_THREAD", "false") + adapter.config.extra["group_sessions_per_user"] = True + adapter.config.extra["history_backfill"] = True + adapter._fetch_channel_context = AsyncMock(return_value="[Recent channel messages]\n[Alice] context") + + bot_user = adapter._client.user + message = make_message( + channel=FakeTextChannel(channel_id=321), + content=f"<@{bot_user.id}> hello with mention", + mentions=[bot_user], + ) + + await adapter._handle_message(message) + + adapter._fetch_channel_context.assert_awaited_once() + event = adapter.handle_message.await_args.args[0] + assert event.text == "hello with mention" + assert event.channel_context == "[Recent channel messages]\n[Alice] context" + + +@pytest.mark.asyncio +async def test_discord_dm_does_not_backfill(adapter, monkeypatch): + """DMs skip backfill โ€” every DM triggers the bot, so there's no mention gap.""" + monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "true") + adapter.config.extra["history_backfill"] = True + adapter._fetch_channel_context = AsyncMock(return_value="[Recent channel messages]\n[Alice] context") + + bot_user = adapter._client.user + dm_channel = SimpleNamespace( + id=999, + name=None, + guild=None, + topic=None, + ) + # Make isinstance(channel, discord.DMChannel) return True + monkeypatch.setattr( + discord_platform.discord, "DMChannel", type(dm_channel), raising=False, + ) + + message = make_message( + channel=dm_channel, + content="hello in DM", + mentions=[], + ) + + await adapter._handle_message(message) + + adapter._fetch_channel_context.assert_not_awaited() + if adapter.handle_message.await_args is not None: + event = adapter.handle_message.await_args.args[0] + assert event.channel_context is None + + diff --git a/tests/gateway/test_session.py b/tests/gateway/test_session.py index 57a8aefa5e81..b8fd45558cdc 100644 --- a/tests/gateway/test_session.py +++ b/tests/gateway/test_session.py @@ -5,6 +5,7 @@ from pathlib import Path from unittest.mock import patch, MagicMock from gateway.config import Platform, HomeChannel, GatewayConfig, PlatformConfig +from gateway.platforms.base import MessageEvent from gateway.session import ( SessionSource, SessionStore, @@ -430,6 +431,76 @@ def test_dm_thread_shows_user_not_multi(self): assert "Multi-user thread" not in prompt +class TestSenderPrefixWithBackfill: + """Regression: sender prefix must not wrap the backfill context block. + + Tests exercise the real GatewayRunner._prepare_inbound_message_text() + method to ensure the [sender_name] prefix applies only to the trigger + message, not the channel_context backfill block. + """ + + @pytest.fixture() + def runner(self): + from gateway.run import GatewayRunner + + r = GatewayRunner.__new__(GatewayRunner) + r.config = GatewayConfig(group_sessions_per_user=False) + r.adapters = {} + r._model = "test-model" + r._base_url = "" + r._has_setup_skill = lambda: False + return r + + @pytest.fixture() + def source(self): + return SessionSource( + platform=Platform.DISCORD, + chat_id="c1", + chat_type="group", + user_name="Alice", + ) + + @pytest.mark.asyncio + async def test_plain_message_gets_prefix(self, runner, source): + """Normal message without backfill gets [sender] prefix.""" + event = MessageEvent(text="hello world", source=source) + result = await runner._prepare_inbound_message_text( + event=event, source=source, history=[], + ) + assert result == "[Alice] hello world" + + @pytest.mark.asyncio + async def test_backfill_prefix_only_on_trigger(self, runner, source): + """Backfill context must NOT get the sender prefix.""" + event = MessageEvent( + text="hello world", + source=source, + channel_context="[Recent channel messages]\n[Bob] some context", + ) + result = await runner._prepare_inbound_message_text( + event=event, source=source, history=[], + ) + assert result.startswith("[Recent channel messages]") + assert "[Alice] [Recent channel messages]" not in result + assert "[New message]\n[Alice] hello world" in result + + @pytest.mark.asyncio + async def test_backfill_preserves_context_block(self, runner, source): + """The backfill block should pass through unchanged โ€” no double-prefixing.""" + context = "[Recent channel messages]\n[Bob] first\n[Charlie [bot]] second" + event = MessageEvent( + text="hey everyone", source=source, channel_context=context, + ) + result = await runner._prepare_inbound_message_text( + event=event, source=source, history=[], + ) + assert result.startswith(context) + assert "[Alice] hey everyone" in result + assert "[Alice] [Bob]" not in result + assert "[Alice] [Charlie" not in result + assert "[Alice] [Recent" not in result + + class TestSessionStoreRewriteTranscript: """Regression: /retry and /undo must persist truncated history to disk.""" diff --git a/tests/gateway/test_session_reset_notify.py b/tests/gateway/test_session_reset_notify.py index 87903921fbda..a4e9d71d0f8c 100644 --- a/tests/gateway/test_session_reset_notify.py +++ b/tests/gateway/test_session_reset_notify.py @@ -205,3 +205,78 @@ def test_to_dict_roundtrip(self): assert restored.notify == original.notify assert restored.notify_exclude_platforms == original.notify_exclude_platforms assert restored.mode == original.mode + + +# --------------------------------------------------------------------------- +# SessionEntry to_dict / from_dict roundtrip for auto-reset fields +# --------------------------------------------------------------------------- + +class TestSessionEntryAutoResetRoundtrip: + def test_was_auto_reset_persists_across_roundtrip(self, tmp_path): + """was_auto_reset=True survives to_dict() โ†’ from_dict() (gateway restart).""" + store = _make_store( + SessionResetPolicy(mode="idle", idle_minutes=1), + tmp_path, + ) + source = _make_source() + + entry = store.get_or_create_session(source) + entry.updated_at = datetime.now() - timedelta(minutes=5) + store._save() + + entry2 = store.get_or_create_session(source) + assert entry2.was_auto_reset is True + assert entry2.auto_reset_reason == "idle" + assert entry2.session_id != entry.session_id + + # Simulate gateway restart: reload from disk + store._loaded = False + store._entries.clear() + store._ensure_loaded() + + reloaded = store._entries.get(entry2.session_key) + assert reloaded is not None + assert reloaded.was_auto_reset is True + assert reloaded.auto_reset_reason == "idle" + + def test_reset_had_activity_persists_across_roundtrip(self, tmp_path): + """reset_had_activity survives to_dict() โ†’ from_dict() (gateway restart).""" + store = _make_store( + SessionResetPolicy(mode="idle", idle_minutes=1), + tmp_path, + ) + source = _make_source() + + entry = store.get_or_create_session(source) + entry.total_tokens = 1000 + entry.updated_at = datetime.now() - timedelta(minutes=5) + store._save() + + entry2 = store.get_or_create_session(source) + assert entry2.reset_had_activity is True + + store._loaded = False + store._entries.clear() + store._ensure_loaded() + + reloaded = store._entries.get(entry2.session_key) + assert reloaded is not None + assert reloaded.reset_had_activity is True + + def test_auto_reset_reason_none_roundtrip(self, tmp_path): + """auto_reset_reason=None (no reset) survives roundtrip cleanly.""" + store = _make_store(tmp_path=tmp_path) + source = _make_source() + + entry = store.get_or_create_session(source) + assert entry.was_auto_reset is False + + store._loaded = False + store._entries.clear() + store._ensure_loaded() + + reloaded = store._entries.get(entry.session_key) + assert reloaded is not None + assert reloaded.was_auto_reset is False + assert reloaded.auto_reset_reason is None + assert reloaded.reset_had_activity is False diff --git a/tests/gateway/test_simplex_plugin.py b/tests/gateway/test_simplex_plugin.py new file mode 100644 index 000000000000..0b1b1b21a85e --- /dev/null +++ b/tests/gateway/test_simplex_plugin.py @@ -0,0 +1,347 @@ +"""Tests for the SimpleX Chat platform-plugin adapter. + +Loaded via the ``_plugin_adapter_loader`` helper so this lives under +``plugin_adapter_simplex`` in ``sys.modules`` and cannot collide with +sibling platform-plugin tests on the same xdist worker. +""" + +from __future__ import annotations + +import json +import os +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from tests.gateway._plugin_adapter_loader import load_plugin_adapter + +_simplex = load_plugin_adapter("simplex") + +SimplexAdapter = _simplex.SimplexAdapter +check_requirements = _simplex.check_requirements +validate_config = _simplex.validate_config +is_connected = _simplex.is_connected +register = _simplex.register +_env_enablement = _simplex._env_enablement +_standalone_send = _simplex._standalone_send +_guess_extension = _simplex._guess_extension +_is_image_ext = _simplex._is_image_ext +_is_audio_ext = _simplex._is_audio_ext +_CORR_PREFIX = _simplex._CORR_PREFIX + + +# --------------------------------------------------------------------------- +# 1. Platform enum (plugin-discovered, not bundled) +# --------------------------------------------------------------------------- + +def test_platform_enum_resolves_via_plugin_scan(): + """The plugin filesystem scan should expose Platform("simplex").""" + from gateway.config import Platform + p = Platform("simplex") + assert p.value == "simplex" + # Identity stability โ€” repeated lookups return the same pseudo-member + assert Platform("simplex") is p + + +# --------------------------------------------------------------------------- +# 2. check_requirements / validate_config / is_connected +# --------------------------------------------------------------------------- + +def test_check_requirements_needs_url(monkeypatch): + monkeypatch.delenv("SIMPLEX_WS_URL", raising=False) + assert check_requirements() is False + + +def test_check_requirements_true_when_configured(monkeypatch): + monkeypatch.setenv("SIMPLEX_WS_URL", "ws://127.0.0.1:5225") + # websockets is a dev dep in this repo via the test plugins; the + # check_requirements() gate also asserts the package imports. + websockets_present = True + try: + import websockets # noqa: F401 + except ImportError: + websockets_present = False + assert check_requirements() is websockets_present + + +def test_validate_config_uses_env_or_extra(): + from gateway.config import PlatformConfig + # Empty extra + no env โ†’ invalid + cfg = PlatformConfig(enabled=True) + assert validate_config(cfg) is False + # extra-only path โ†’ valid + cfg2 = PlatformConfig(enabled=True, extra={"ws_url": "ws://localhost:5225"}) + assert validate_config(cfg2) is True + + +def test_is_connected_mirrors_validate(monkeypatch): + from gateway.config import PlatformConfig + monkeypatch.delenv("SIMPLEX_WS_URL", raising=False) + cfg = PlatformConfig(enabled=True, extra={"ws_url": "ws://x"}) + assert is_connected(cfg) is True + assert is_connected(PlatformConfig(enabled=True)) is False + + +# --------------------------------------------------------------------------- +# 3. _env_enablement seeds PlatformConfig.extra +# --------------------------------------------------------------------------- + +def test_env_enablement_none_when_unset(monkeypatch): + monkeypatch.delenv("SIMPLEX_WS_URL", raising=False) + assert _env_enablement() is None + + +def test_env_enablement_seeds_ws_url(monkeypatch): + monkeypatch.setenv("SIMPLEX_WS_URL", "ws://127.0.0.1:5225") + monkeypatch.delenv("SIMPLEX_HOME_CHANNEL", raising=False) + seed = _env_enablement() + assert seed == {"ws_url": "ws://127.0.0.1:5225"} + + +def test_env_enablement_seeds_home_channel(monkeypatch): + monkeypatch.setenv("SIMPLEX_WS_URL", "ws://127.0.0.1:5225") + monkeypatch.setenv("SIMPLEX_HOME_CHANNEL", "42") + monkeypatch.setenv("SIMPLEX_HOME_CHANNEL_NAME", "Personal") + seed = _env_enablement() + assert seed["home_channel"] == {"chat_id": "42", "name": "Personal"} + + +def test_env_enablement_home_channel_defaults_name_to_id(monkeypatch): + monkeypatch.setenv("SIMPLEX_WS_URL", "ws://127.0.0.1:5225") + monkeypatch.setenv("SIMPLEX_HOME_CHANNEL", "42") + monkeypatch.delenv("SIMPLEX_HOME_CHANNEL_NAME", raising=False) + seed = _env_enablement() + assert seed["home_channel"] == {"chat_id": "42", "name": "42"} + + +# --------------------------------------------------------------------------- +# 4. Adapter init +# --------------------------------------------------------------------------- + +def test_adapter_init_custom_url(): + from gateway.config import PlatformConfig + cfg = PlatformConfig(enabled=True, extra={"ws_url": "ws://localhost:5225"}) + adapter = SimplexAdapter(cfg) + assert adapter.ws_url == "ws://localhost:5225" + assert adapter._running is False + assert adapter._ws is None + + +def test_adapter_init_default_url(): + from gateway.config import PlatformConfig + cfg = PlatformConfig(enabled=True) + adapter = SimplexAdapter(cfg) + assert adapter.ws_url == "ws://127.0.0.1:5225" + + +def test_adapter_platform_identity(): + """Adapter should expose Platform("simplex") identity.""" + from gateway.config import Platform, PlatformConfig + cfg = PlatformConfig(enabled=True) + adapter = SimplexAdapter(cfg) + assert adapter.platform is Platform("simplex") + + +# --------------------------------------------------------------------------- +# 5. Helper functions (magic-byte detection) +# --------------------------------------------------------------------------- + +def test_guess_extension_png(): + assert _guess_extension(b"\x89PNG\r\n\x1a\n") == ".png" + + +def test_guess_extension_jpg(): + assert _guess_extension(b"\xff\xd8\xff\xe0") == ".jpg" + + +def test_guess_extension_ogg(): + assert _guess_extension(b"OggS\x00\x02") == ".ogg" + + +def test_guess_extension_unknown(): + assert _guess_extension(b"\x00\x01\x02\x03") == ".bin" + + +def test_is_image_ext(): + assert _is_image_ext(".png") is True + assert _is_image_ext(".webp") is True + assert _is_image_ext(".ogg") is False + + +def test_is_audio_ext(): + assert _is_audio_ext(".ogg") is True + assert _is_audio_ext(".mp3") is True + assert _is_audio_ext(".pdf") is False + + +# --------------------------------------------------------------------------- +# 6. Correlation IDs +# --------------------------------------------------------------------------- + +def test_corr_id_starts_with_prefix_and_tracks_pending(): + from gateway.config import PlatformConfig + cfg = PlatformConfig(enabled=True, extra={"ws_url": "ws://localhost:5225"}) + adapter = SimplexAdapter(cfg) + corr_id = adapter._make_corr_id() + assert corr_id.startswith(_CORR_PREFIX) + assert corr_id in adapter._pending_corr_ids + + +def test_corr_id_pending_set_self_trims(): + from gateway.config import PlatformConfig + cfg = PlatformConfig(enabled=True, extra={"ws_url": "ws://localhost:5225"}) + adapter = SimplexAdapter(cfg) + adapter._max_pending_corr = 4 + for _ in range(10): + adapter._make_corr_id() + # After many additions, the pending set should be bounded by the trim + # logic โ€” at most one trim window above the cap. + assert len(adapter._pending_corr_ids) <= adapter._max_pending_corr + 1 + + +# --------------------------------------------------------------------------- +# 7. Outbound send (mocked WS) +# --------------------------------------------------------------------------- + +@pytest.mark.asyncio +async def test_send_dm(): + from gateway.config import PlatformConfig + cfg = PlatformConfig(enabled=True, extra={"ws_url": "ws://localhost:5225"}) + adapter = SimplexAdapter(cfg) + + mock_ws = AsyncMock() + adapter._ws = mock_ws + + result = await adapter.send("contact-42", "Hello, SimpleX!") + mock_ws.send.assert_called_once() + payload = json.loads(mock_ws.send.call_args[0][0]) + assert payload["cmd"] == "@[contact-42] Hello, SimpleX!" + assert payload["corrId"].startswith(_CORR_PREFIX) + assert result.success is True + + +@pytest.mark.asyncio +async def test_send_group(): + from gateway.config import PlatformConfig + cfg = PlatformConfig(enabled=True, extra={"ws_url": "ws://localhost:5225"}) + adapter = SimplexAdapter(cfg) + + mock_ws = AsyncMock() + adapter._ws = mock_ws + + result = await adapter.send("group:grp-99", "Hello, group!") + payload = json.loads(mock_ws.send.call_args[0][0]) + assert payload["cmd"] == "#[grp-99] Hello, group!" + assert result.success is True + + +@pytest.mark.asyncio +async def test_send_when_ws_not_connected_does_not_crash(): + from gateway.config import PlatformConfig + cfg = PlatformConfig(enabled=True, extra={"ws_url": "ws://localhost:5225"}) + adapter = SimplexAdapter(cfg) + # No _ws assigned โ€” _send_ws should drop quietly + result = await adapter.send("contact-42", "hi") + assert result.success is True # send() always returns success โ€” fire-and-forget + + +# --------------------------------------------------------------------------- +# 8. Inbound: filter own-echo by corrId prefix +# --------------------------------------------------------------------------- + +@pytest.mark.asyncio +async def test_handle_event_filters_own_corr_id(): + from gateway.config import PlatformConfig + cfg = PlatformConfig(enabled=True, extra={"ws_url": "ws://localhost:5225"}) + adapter = SimplexAdapter(cfg) + # Pretend we sent a command with this corrId + own = adapter._make_corr_id() + handler_mock = AsyncMock() + adapter._handle_new_chat_item = handler_mock # type: ignore + + await adapter._handle_event({"corrId": own, "type": "newChatItem"}) + handler_mock.assert_not_called() + assert own not in adapter._pending_corr_ids # discarded + + +# --------------------------------------------------------------------------- +# 9. Standalone (out-of-process) send for cron +# --------------------------------------------------------------------------- + +@pytest.mark.asyncio +async def test_standalone_send_missing_websockets(monkeypatch): + """When websockets is unimportable, return a clean error dict. + + Implementation detail: the standalone path does ``import websockets`` + inside the function body. We simulate the package being absent by + pulling it out of ``sys.modules`` and pointing the finder at None. + """ + import sys + saved_websockets = sys.modules.pop("websockets", None) + saved_meta = list(sys.meta_path) + + class _Blocker: + @staticmethod + def find_spec(name, path=None, target=None): + if name == "websockets" or name.startswith("websockets."): + raise ImportError("websockets blocked for test") + return None + + sys.meta_path.insert(0, _Blocker()) + try: + pconfig = MagicMock() + pconfig.extra = {"ws_url": "ws://localhost:5225"} + result = await _standalone_send(pconfig, "contact-42", "hi") + assert isinstance(result, dict) + assert "error" in result + assert "websockets" in result["error"] + finally: + sys.meta_path[:] = saved_meta + if saved_websockets is not None: + sys.modules["websockets"] = saved_websockets + + +@pytest.mark.asyncio +async def test_standalone_send_missing_url(monkeypatch): + monkeypatch.delenv("SIMPLEX_WS_URL", raising=False) + pconfig = MagicMock() + pconfig.extra = {} + # We expect the URL fallback (extra+env both empty) to be empty string, + # producing an error. We also need websockets to be importable for the + # url-check branch to be reached, so skip when it's not. + try: + import websockets.client # noqa: F401 + except ImportError: + pytest.skip("websockets not installed") + + result = await _standalone_send(pconfig, "contact-42", "hi") + assert isinstance(result, dict) + # Either error about URL or a connection attempt failure โ€” both are valid + # signals that the standalone path requires configuration. + assert "error" in result + + +# --------------------------------------------------------------------------- +# 10. register() โ€” plugin-side metadata +# --------------------------------------------------------------------------- + +def test_register_calls_register_platform(): + ctx = MagicMock() + register(ctx) + ctx.register_platform.assert_called_once() + kwargs = ctx.register_platform.call_args.kwargs + assert kwargs["name"] == "simplex" + assert kwargs["label"] == "SimpleX Chat" + assert kwargs["required_env"] == ["SIMPLEX_WS_URL"] + assert kwargs["allowed_users_env"] == "SIMPLEX_ALLOWED_USERS" + assert kwargs["allow_all_env"] == "SIMPLEX_ALLOW_ALL_USERS" + assert kwargs["cron_deliver_env_var"] == "SIMPLEX_HOME_CHANNEL" + assert callable(kwargs["check_fn"]) + assert callable(kwargs["validate_config"]) + assert callable(kwargs["is_connected"]) + assert callable(kwargs["env_enablement_fn"]) + assert callable(kwargs["standalone_sender_fn"]) + assert callable(kwargs["adapter_factory"]) + assert callable(kwargs["setup_fn"]) + # SimpleX uses opaque IDs only โ€” no PII to redact. + assert kwargs["pii_safe"] is True diff --git a/tests/gateway/test_tlon_adapter.py b/tests/gateway/test_tlon_adapter.py new file mode 100644 index 000000000000..fb0c49cbaeb9 --- /dev/null +++ b/tests/gateway/test_tlon_adapter.py @@ -0,0 +1,379 @@ +from datetime import datetime +from unittest.mock import AsyncMock + +import pytest + +from gateway.config import PlatformConfig +from gateway.platforms.tlon import ( + TlonAdapter, + _extract_message_text, + _text_to_story, +) +from gateway.platforms.tlon_approval import create_pending_approval, format_pending_list +from gateway.platforms.tlon_discovery import parse_groups_ui_init +from gateway.platforms.tlon_media import ( + TlonDownloadedAttachment, + format_blob_annotations, + parse_blob_data, +) +from gateway.platforms.tlon_settings import parse_settings_response + + +def test_text_to_story_handles_mentions_links_code_and_images(): + story = _text_to_story( + "hi ~zod\nlink https://example.com\n\n" + "```python\nprint(1)\n```\n" + "![alt](https://example.com/a.png)" + ) + + assert story[0] == { + "inline": [ + "hi ", + {"ship": "~zod"}, + {"break": None}, + "link ", + { + "link": { + "href": "https://example.com", + "content": "https://example.com", + } + }, + ] + } + assert story[1] == { + "block": { + "code": { + "code": "print(1)", + "lang": "python", + } + } + } + assert story[2] == { + "block": { + "image": { + "src": "https://example.com/a.png", + "alt": "alt", + "width": 0, + "height": 0, + } + } + } + + +def test_extract_message_text_preserves_inline_spacing_and_blocks(): + content = [ + { + "inline": [ + "hi ", + {"bold": ["there"]}, + {"break": None}, + {"link": {"href": "https://example.com", "content": "site"}}, + " ", + {"ship": "~zod"}, + ] + }, + {"block": {"code": {"lang": "python", "code": "print(1)"}}}, + ] + + assert _extract_message_text(content) == ( + "hi there\nsite ~zod\n```python\nprint(1)\n```" + ) + + +def test_parse_blob_data_formats_supported_entries(): + entries = parse_blob_data( + """ + [ + {"type":"file","fileUri":"https://example.com/report.pdf","mimeType":"application/pdf","name":"report.pdf","size":2048}, + {"type":"voicememo","fileUri":"https://example.com/memo.m4a","duration":4.2,"transcription":"hello"}, + {"type":"video","fileUri":"https://example.com/clip.mp4","mimeType":"video/mp4","name":"clip.mp4"}, + {"type":"unknown","fileUri":"https://example.com/nope"} + ] + """ + ) + + assert [entry.type for entry in entries] == ["file", "voicememo", "video"] + annotation = format_blob_annotations(entries) + assert "[file: report.pdf (application/pdf, 2KB)] https://example.com/report.pdf" in annotation + assert "[voice memo: 4s] https://example.com/memo.m4a" in annotation + assert 'Transcription: "hello"' in annotation + assert "[video: clip.mp4 (video/mp4, unknown size)] https://example.com/clip.mp4" in annotation + + +def test_parse_groups_ui_init_discovers_channels_and_names(): + parsed = parse_groups_ui_init({ + "groups": { + "~host/test": { + "meta": {"title": "Test Group"}, + "channels": { + "chat/~host/general": {}, + "heap/~host/images": {}, + "diary/~host/blog": {}, + "bad-channel": {}, + }, + } + }, + "foreigns": {"~else/group": {"invites": [{"valid": True}]}}, + }) + + assert parsed.channels == { + "chat/~host/general", + "heap/~host/images", + "diary/~host/blog", + } + assert parsed.channel_to_group["chat/~host/general"] == "~host/test" + assert parsed.group_names["~host/test"] == "Test Group" + assert "~else/group" in parsed.foreigns + + +def test_parse_settings_response_reads_tlon_bucket(): + settings = parse_settings_response({ + "all": { + "moltbot": { + "tlon": { + "groupChannels": ["chat/~host/general"], + "dmAllowlist": ["~zod"], + "autoDiscover": True, + "channelRules": '{"chat/~host/general":{"mode":"restricted","allowedShips":["~nec"]}}', + "defaultAuthorizedShips": ["~bus"], + "ownerShip": "~ten", + } + } + } + }) + + assert settings.group_channels == ["chat/~host/general"] + assert settings.dm_allowlist == ["~zod"] + assert settings.auto_discover is True + assert settings.channel_rules["chat/~host/general"]["allowedShips"] == ["~nec"] + assert settings.default_authorized_ships == ["~bus"] + assert settings.owner_ship == "~ten" + + +def test_approval_formatting_lists_pending_request(): + approval = create_pending_approval( + approval_type="dm", + requesting_ship="~zod", + existing_ids=[], + message_preview="hello", + ) + + pending = format_pending_list([approval]) + assert approval.id in pending + assert "~zod" in pending + + +@pytest.mark.asyncio +async def test_channel_event_routes_top_level_mentions(monkeypatch): + monkeypatch.setenv("TLON_SHIP_NAME", "~bot-palnet") + adapter = TlonAdapter(PlatformConfig()) + adapter.monitored_channels = {"chat/~host/test"} + adapter.handle_message = AsyncMock() + + await adapter._handle_channel_event({ + "nest": "chat/~host/test", + "response": { + "post": { + "id": "170141184507864167403996323545639550976", + "r-post": { + "set": { + "seal": {"id": "170141184507864167403996323545639550976"}, + "essay": { + "author": "~zod", + "sent": 1_700_000_000_000, + "content": [ + {"inline": [{"ship": "~bot-palnet"}, " hello"]} + ], + }, + } + }, + } + }, + }) + + adapter.handle_message.assert_awaited_once() + event = adapter.handle_message.await_args.args[0] + assert event.text == "hello" + assert event.message_id == "170141184507864167403996323545639550976" + assert event.reply_to_message_id is None + assert event.source.chat_id == "chat/~host/test" + assert event.source.user_id == "~zod" + assert isinstance(event.timestamp, datetime) + + +@pytest.mark.asyncio +async def test_channel_event_routes_thread_reply_to_parent(monkeypatch): + monkeypatch.setenv("TLON_SHIP_NAME", "~bot-palnet") + adapter = TlonAdapter(PlatformConfig()) + adapter.monitored_channels = {"chat/~host/test"} + adapter.handle_message = AsyncMock() + + await adapter._handle_channel_event({ + "nest": "chat/~host/test", + "response": { + "post": { + "id": "parent-post", + "r-post": { + "reply": { + "id": "reply-post", + "r-reply": { + "set": { + "seal": {"parent-id": "parent-post"}, + "memo": { + "author": "~zod", + "sent": 1_700_000_000_000, + "content": [ + {"inline": ["hey ", {"ship": "~bot-palnet"}]} + ], + }, + } + }, + } + }, + } + }, + }) + + adapter.handle_message.assert_awaited_once() + event = adapter.handle_message.await_args.args[0] + assert event.text == "hey" + assert event.message_id == "reply-post" + assert event.reply_to_message_id == "parent-post" + assert event.source.thread_id == "parent-post" + + +@pytest.mark.asyncio +async def test_channel_event_routes_blob_only_owner_message(monkeypatch): + monkeypatch.setenv("TLON_SHIP_NAME", "~bot-palnet") + monkeypatch.setenv("TLON_OWNER_SHIP", "~zod") + adapter = TlonAdapter(PlatformConfig()) + adapter.monitored_channels = {"chat/~host/test"} + adapter.handle_message = AsyncMock() + + async def fake_download_blob_attachments(entries): + return [TlonDownloadedAttachment("/tmp/report.pdf", "application/pdf")], [] + + monkeypatch.setattr( + "gateway.platforms.tlon.download_blob_attachments", + fake_download_blob_attachments, + ) + + await adapter._handle_channel_event({ + "nest": "chat/~host/test", + "response": { + "post": { + "id": "blob-post", + "r-post": { + "set": { + "seal": {"id": "blob-post"}, + "essay": { + "author": "~zod", + "sent": 1_700_000_000_000, + "content": [], + "blob": '[{"type":"file","fileUri":"https://example.com/report.pdf","mimeType":"application/pdf","name":"report.pdf","size":2048}]', + }, + } + }, + } + }, + }) + + adapter.handle_message.assert_awaited_once() + event = adapter.handle_message.await_args.args[0] + assert "[file: report.pdf" in event.text + assert event.media_urls == ["/tmp/report.pdf"] + assert event.media_types == ["application/pdf"] + assert event.message_type.value == "document" + + +@pytest.mark.asyncio +async def test_dm_event_uses_partner_for_routing_and_skips_own_messages(monkeypatch): + monkeypatch.setenv("TLON_SHIP_NAME", "~bot-palnet") + monkeypatch.delenv("TLON_OWNER_SHIP", raising=False) + monkeypatch.delenv("TLON_ALLOWED_USERS", raising=False) + monkeypatch.delenv("TLON_DM_ALLOWLIST", raising=False) + monkeypatch.delenv("TLON_DEFAULT_AUTHORIZED_SHIPS", raising=False) + monkeypatch.delenv("GATEWAY_ALLOWED_USERS", raising=False) + monkeypatch.delenv("GATEWAY_ALLOW_ALL_USERS", raising=False) + adapter = TlonAdapter(PlatformConfig()) + adapter.handle_message = AsyncMock() + + await adapter._handle_dm_event({ + "whom": "~zod", + "id": "~zod/170.141", + "response": { + "add": { + "essay": { + "author": "~zod", + "sent": 1_700_000_000_000, + "content": [{"inline": ["hello"]}], + } + } + }, + }) + + adapter.handle_message.assert_awaited_once() + event = adapter.handle_message.await_args.args[0] + assert event.text == "hello" + assert event.source.chat_id == "~zod" + assert event.source.chat_type == "dm" + + adapter.handle_message.reset_mock() + await adapter._handle_dm_event({ + "whom": "~zod", + "id": "~bot-palnet/170.142", + "response": { + "add": { + "essay": { + "author": "~bot-palnet", + "sent": 1_700_000_000_001, + "content": [{"inline": ["own message"]}], + } + } + }, + }) + + adapter.handle_message.assert_not_awaited() + + +@pytest.mark.asyncio +async def test_dm_invite_list_accepts_string_and_object_entries(monkeypatch): + monkeypatch.setenv("TLON_SHIP_NAME", "~bot-palnet") + monkeypatch.setenv("TLON_OWNER_SHIP", "~zod") + monkeypatch.setenv("TLON_ALLOWED_USERS", "~nec") + adapter = TlonAdapter(PlatformConfig()) + adapter._sse = AsyncMock() + + await adapter._handle_dm_event(["~zod", {"ship": "~nec"}, 42, {"other": "~bud"}]) + + assert adapter._sse.poke.await_count == 2 + ships = [call.kwargs["json_data"]["ship"] for call in adapter._sse.poke.await_args_list] + assert ships == ["zod", "nec"] + + +@pytest.mark.asyncio +async def test_unauthorized_dm_queues_owner_approval(monkeypatch): + monkeypatch.setenv("TLON_SHIP_NAME", "~bot-palnet") + monkeypatch.setenv("TLON_OWNER_SHIP", "~ten") + adapter = TlonAdapter(PlatformConfig()) + adapter.handle_message = AsyncMock() + adapter.send = AsyncMock() + + await adapter._handle_dm_event({ + "whom": "~zod", + "id": "~zod/170.141", + "response": { + "add": { + "essay": { + "author": "~zod", + "sent": 1_700_000_000_000, + "content": [{"inline": ["hello"]}], + } + } + }, + }) + + adapter.handle_message.assert_not_awaited() + assert len(adapter.pending_approvals) == 1 + assert adapter.pending_approvals[0].requesting_ship == "~zod" + adapter.send.assert_awaited_once() diff --git a/tests/hermes_cli/test_codex_runtime_plugin_migration.py b/tests/hermes_cli/test_codex_runtime_plugin_migration.py index b2e27f8c97bc..ebdc9f9ae6b6 100644 --- a/tests/hermes_cli/test_codex_runtime_plugin_migration.py +++ b/tests/hermes_cli/test_codex_runtime_plugin_migration.py @@ -8,9 +8,13 @@ from hermes_cli.codex_runtime_plugin_migration import ( MIGRATION_MARKER, + MIGRATION_END_MARKER, MigrationReport, + _build_hermes_tools_mcp_entry, _format_toml_value, + _looks_like_test_tempdir, _strip_existing_managed_block, + _strip_unmanaged_plugin_tables, _translate_one_server, migrate, render_codex_toml_section, @@ -567,10 +571,31 @@ def test_preserves_user_codex_config_above_marker(self, tmp_path): assert "[model]" in new_text assert 'profile = "default"' in new_text assert "[providers.openai]" in new_text - # And new MCP block appended + # And new MCP block inserted without breaking user tables assert "[mcp_servers.a]" in new_text assert MIGRATION_MARKER in new_text + def test_managed_root_keys_stay_top_level_when_config_ends_in_table(self, tmp_path): + """TOML has no explicit 'leave current table' syntax. If Hermes appends + root keys like default_permissions after a user table such as [features], + Codex parses them as features.default_permissions and rejects the config. + The managed block must therefore be inserted before the first table.""" + import tomllib + + target = tmp_path / "config.toml" + target.write_text( + 'model = "gpt-5.5"\n' + "\n" + "[features]\n" + "terminal_resize_reflow = true\n" + ) + migrate({}, codex_home=tmp_path, discover_plugins=False, expose_hermes_tools=False) + new_text = target.read_text() + parsed = tomllib.loads(new_text) + assert parsed["default_permissions"] == ":workspace" + assert "default_permissions" not in parsed["features"] + assert new_text.index(MIGRATION_MARKER) < new_text.index("[features]") + def test_preserves_user_mcp_server_outside_managed_block(self, tmp_path): """Quirk #6: when a user adds their own MCP server entry directly to ~/.codex/config.toml outside Hermes' managed block, re-running @@ -635,3 +660,206 @@ def test_summary_reports_migration_count(self, tmp_path): assert "Migrated 2 MCP server(s)" in summary assert "- a" in summary assert "- b" in summary + + +# ---- Bug B: duplicate [plugins.X] tables ---- + + +class TestStripUnmanagedPluginTables: + """Regression tests for issue #26250 Bug B. + + When codex itself writes ``[plugins."@"]`` tables + (via the user running ``codex plugins enable`` directly), re-running + ``hermes codex-runtime migrate`` would re-emit them inside the managed + block and the resulting duplicate-table-header would crash codex. + """ + + def test_strips_plugin_tables_outside_managed_block(self): + text = ( + 'model = "gpt-5.5"\n' + "\n" + "[mcp_servers.user-thing]\n" + 'command = "x"\n' + "\n" + '[plugins."tasks@openai-curated"]\n' + "enabled = true\n" + "\n" + '[plugins."web-search@openai-curated"]\n' + "enabled = true\n" + "\n" + "[features]\n" + "terminal_resize_reflow = true\n" + ) + stripped = _strip_unmanaged_plugin_tables(text) + assert "[plugins." not in stripped + # Non-plugin content preserved + assert "[mcp_servers.user-thing]" in stripped + assert "[features]" in stripped + assert "terminal_resize_reflow = true" in stripped + + def test_preserves_content_when_no_plugin_tables(self): + text = ( + 'model = "gpt-5.5"\n' + "\n" + "[mcp_servers.x]\n" + 'command = "y"\n' + ) + assert _strip_unmanaged_plugin_tables(text) == text + + def test_multi_line_array_in_plugin_table_does_not_leak(self): + """A multi-line TOML array inside a [plugins.X] table whose + continuation lines start with ``[`` (e.g. nested arrays) must NOT + prematurely exit the strip region โ€” otherwise array fragments + leak into top-level output and produce invalid TOML on the next + codex startup. Regression guard for #26260 review. + """ + text = ( + '[plugins."tasks@openai-curated"]\n' + "allowed = [\n" + ' "a",\n' + ' ["nested"],\n' + "]\n" + "[features]\n" + "x = 1\n" + ) + stripped = _strip_unmanaged_plugin_tables(text) + # Everything inside the plugin table โ€” including the multi-line + # array's continuation lines starting with `[` โ€” should be gone. + assert '["nested"]' not in stripped + assert "allowed" not in stripped + # Sibling user table survives intact. + assert "[features]" in stripped + assert "x = 1" in stripped + # Result is still valid TOML. + import tomllib + tomllib.loads(stripped) + + def test_migrate_dedups_codex_owned_plugin_tables(self, tmp_path, monkeypatch): + """End-to-end: codex's pre-existing [plugins.X] tables get replaced by + the managed block's re-emission rather than duplicated.""" + target = tmp_path / "config.toml" + target.write_text( + "[mcp_servers.user-server]\n" + 'command = "x"\n' + "\n" + '[plugins."tasks@openai-curated"]\n' + "enabled = true\n" + ) + + # Simulate codex's plugin/list reporting the same plugin tasks@openai-curated. + def fake_query(codex_home=None, timeout=8.0): + return ( + [{"name": "tasks", "marketplace": "openai-curated", "enabled": True}], + None, + ) + + monkeypatch.setattr( + "hermes_cli.codex_runtime_plugin_migration._query_codex_plugins", + fake_query, + ) + migrate({}, codex_home=tmp_path, discover_plugins=True, expose_hermes_tools=False) + new_text = target.read_text() + # Only ONE [plugins."tasks@openai-curated"] header should remain โ€” inside + # the managed block โ€” not the original outside-the-block copy. + assert new_text.count('[plugins."tasks@openai-curated"]') == 1 + # And the surviving one is inside our managed section. + managed_start = new_text.index(MIGRATION_MARKER) + managed_end = new_text.index(MIGRATION_END_MARKER) + plugin_idx = new_text.index('[plugins."tasks@openai-curated"]') + assert managed_start < plugin_idx < managed_end + # File parses cleanly as TOML (the original duplicate-key error is gone). + import tomllib + tomllib.loads(new_text) + + def test_migrate_preserves_plugin_tables_when_plugin_list_fails(self, tmp_path, monkeypatch): + """If plugin/list RPC fails, we can't re-emit plugins authoritatively, + so we must NOT strip the user's existing [plugins.X] tables โ€” that + would silently lose them.""" + target = tmp_path / "config.toml" + target.write_text( + '[plugins."tasks@openai-curated"]\n' + "enabled = true\n" + ) + + def fake_query(codex_home=None, timeout=8.0): + return ([], "plugin/list query failed: codex not installed") + + monkeypatch.setattr( + "hermes_cli.codex_runtime_plugin_migration._query_codex_plugins", + fake_query, + ) + migrate({}, codex_home=tmp_path, discover_plugins=True, expose_hermes_tools=False) + new_text = target.read_text() + # User's plugin table preserved verbatim โ€” we can't re-emit it. + assert '[plugins."tasks@openai-curated"]' in new_text + + +# ---- Bug C: HERMES_HOME tempdir leak into ~/.codex/config.toml ---- + + +class TestHermesHomeLeakGuard: + """Regression tests for issue #26250 Bug C. + + Previously ``_build_hermes_tools_mcp_entry()`` read ``HERMES_HOME`` + directly from ``os.environ``, so a pytest ``monkeypatch.setenv`` would + leak a transient tempdir path into the user's real ``~/.codex/config.toml`` + once codex spawned the hermes-tools MCP subprocess. + """ + + def test_tempdir_detector_recognizes_pytest_paths(self): + assert _looks_like_test_tempdir( + "/private/var/folders/abc/pytest-of-kshitij/pytest-137/popen-gw2/test_X/hermes_test" + ) + assert _looks_like_test_tempdir( + "/tmp/pytest-of-user/pytest-12/test_X/hermes" + ) + assert _looks_like_test_tempdir( + "/private/var/folders/zz/T/pytest-of-bob/pytest-1" + ) + + def test_tempdir_detector_accepts_real_hermes_home(self): + assert not _looks_like_test_tempdir("/Users/alice/.hermes") + assert not _looks_like_test_tempdir("/home/bob/.hermes") + assert not _looks_like_test_tempdir("/opt/hermes") + assert not _looks_like_test_tempdir("") + + def test_pytest_tempdir_not_burned_into_mcp_env(self, monkeypatch): + """The headline regression: even when HERMES_HOME points at a pytest + tempdir, _build_hermes_tools_mcp_entry() must NOT propagate it.""" + monkeypatch.setenv( + "HERMES_HOME", + "/private/var/folders/xx/pytest-of-user/pytest-99/test_x/hermes_test", + ) + entry = _build_hermes_tools_mcp_entry() + env = entry.get("env", {}) + assert "HERMES_HOME" not in env, ( + f"pytest-tempdir HERMES_HOME leaked into codex MCP entry: " + f"{env.get('HERMES_HOME')!r}" + ) + + def test_real_hermes_home_propagates(self, monkeypatch, tmp_path): + """A legitimate HERMES_HOME (not a tempdir path) DOES propagate so the + MCP subprocess sees the same config as the parent CLI.""" + # Use a path that looks real โ€” under /Users or /home, not /var/folders. + # We can't easily create one in the test, so just use a stable path + # outside any tempdir-detector needle. The detector checks for tempdir + # markers, not for path existence. + real_path = "/Users/alice/.hermes" + monkeypatch.setenv("HERMES_HOME", real_path) + entry = _build_hermes_tools_mcp_entry() + env = entry.get("env", {}) + assert env.get("HERMES_HOME") == real_path + + def test_unset_hermes_home_omits_env_key(self, monkeypatch): + """When HERMES_HOME is unset in the environment, the MCP entry MUST + NOT bake in a resolved-default path. The codex subprocess should + inherit whatever HERMES_HOME its launcher (systemd, gateway, shell) + sets at runtime, rather than being pinned to migrate-time defaults. + Regression guard for issue #26250 follow-up review.""" + monkeypatch.delenv("HERMES_HOME", raising=False) + entry = _build_hermes_tools_mcp_entry() + env = entry.get("env", {}) + assert "HERMES_HOME" not in env, ( + f"HERMES_HOME should not be set when env var is unset, got: " + f"{env.get('HERMES_HOME')!r}" + ) diff --git a/tests/hermes_cli/test_codex_runtime_switch.py b/tests/hermes_cli/test_codex_runtime_switch.py index 9a01543776ed..7bf1a59e1e72 100644 --- a/tests/hermes_cli/test_codex_runtime_switch.py +++ b/tests/hermes_cli/test_codex_runtime_switch.py @@ -114,8 +114,15 @@ def test_enable_succeeds_when_codex_present(self): def persist(c): persisted.update(c) + # Patch migrate so this test doesn't reach into the user's real + # ~/.codex/config.toml. See issue #26250 Bug C โ€” without this patch, + # crs.apply() invokes the real migrate() which writes to + # Path.home() / ".codex" using whatever HERMES_HOME the running pytest + # session has set, leaking pytest tempdir paths into the user's + # codex config. with patch.object(crs, "check_codex_binary_ok", - return_value=(True, "0.130.0")): + return_value=(True, "0.130.0")), \ + patch("hermes_cli.codex_runtime_plugin_migration.migrate"): r = crs.apply(cfg, "codex_app_server", persist_callback=persist) assert r.success assert r.new_value == "codex_app_server" diff --git a/tests/hermes_cli/test_set_config_value.py b/tests/hermes_cli/test_set_config_value.py index 617a915e3225..39faa83cf58e 100644 --- a/tests/hermes_cli/test_set_config_value.py +++ b/tests/hermes_cli/test_set_config_value.py @@ -39,8 +39,6 @@ class TestExplicitAllowlist: "OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY", - "WANDB_API_KEY", - "TINKER_API_KEY", "HONCHO_API_KEY", "FIRECRAWL_API_KEY", "BROWSERBASE_API_KEY", diff --git a/tests/hermes_cli/test_setup_hermes_script.py b/tests/hermes_cli/test_setup_hermes_script.py index 7978e660a897..a4eb5ccb7d02 100644 --- a/tests/hermes_cli/test_setup_hermes_script.py +++ b/tests/hermes_cli/test_setup_hermes_script.py @@ -18,4 +18,3 @@ def test_setup_hermes_script_has_termux_path(): assert ".[termux]" in content assert "constraints-termux.txt" in content assert "$PREFIX/bin" in content - assert "Skipping tinker-atropos on Termux" in content diff --git a/tests/hermes_cli/test_update_autostash.py b/tests/hermes_cli/test_update_autostash.py index 645b3b24ea4c..f7d90245a810 100644 --- a/tests/hermes_cli/test_update_autostash.py +++ b/tests/hermes_cli/test_update_autostash.py @@ -305,6 +305,7 @@ def _setup_update_mocks(monkeypatch, tmp_path): monkeypatch.setattr(hermes_config, "get_missing_config_fields", lambda: []) monkeypatch.setattr(hermes_config, "check_config_version", lambda: (5, 5)) monkeypatch.setattr(hermes_config, "migrate_config", lambda **kw: {"env_added": [], "config_added": []}) + monkeypatch.setattr(hermes_main, "_refresh_active_lazy_features", lambda: None) def test_cmd_update_retries_optional_extras_individually_when_all_fails(monkeypatch, tmp_path, capsys): diff --git a/tests/plugins/test_langfuse_plugin.py b/tests/plugins/test_langfuse_plugin.py index 6d9fcce38ee4..313d2e94a72f 100644 --- a/tests/plugins/test_langfuse_plugin.py +++ b/tests/plugins/test_langfuse_plugin.py @@ -2,6 +2,7 @@ from __future__ import annotations import importlib +import logging import sys from pathlib import Path @@ -164,7 +165,542 @@ def test_hooks_noop_without_client(self, monkeypatch): # Each hook should just return; no exceptions. mod.on_pre_llm_call(task_id="t", session_id="s", messages=[{"role": "user", "content": "hi"}]) - mod.on_pre_llm_request(task_id="t", session_id="s", api_call_count=1, messages=[]) + mod.on_pre_llm_request(task_id="t", session_id="s", api_call_count=1, request_messages=[]) mod.on_post_llm_call(task_id="t", session_id="s", api_call_count=1) mod.on_pre_tool_call(tool_name="read_file", args={}, task_id="t", session_id="s") mod.on_post_tool_call(tool_name="read_file", args={}, result="ok", task_id="t", session_id="s") + + +# --------------------------------------------------------------------------- +# Placeholder-credential guard (#23823). +# +# Regression coverage for the silent-failure bug: when an operator leaves +# HERMES_LANGFUSE_PUBLIC_KEY / SECRET_KEY at a template value like +# "placeholder", "test-key", or "your-langfuse-key", the SDK accepts the +# credentials at construction time (it does no server-side validation +# eagerly) but drops every trace at flush time, with no signal in the +# Hermes logs. The fix in `_get_langfuse()` validates the documented +# `pk-lf-` / `sk-lf-` prefix Langfuse always issues, surfaces a one-shot +# warning naming the offending env var(s), and short-circuits via the +# same `_INIT_FAILED` path used for missing credentials so subsequent +# hook invocations don't re-log. +# --------------------------------------------------------------------------- + + +class _FakeLangfuse: + """Stand-in for the real :class:`langfuse.Langfuse` so tests don't + need the optional ``langfuse`` SDK installed. The plugin's runtime + gate refuses to proceed past ``if Langfuse is None`` when the SDK + is missing, which would short-circuit before the placeholder check + can fire. Patching ``plugin.Langfuse`` with this class lets the + placeholder validator exercise its full code path.""" + + instances: list["_FakeLangfuse"] = [] + + def __init__(self, **kwargs): + self.kwargs = kwargs + _FakeLangfuse.instances.append(self) + + +class TestPlaceholderKeyDetection: + LOGGER_NAME = "plugins.observability.langfuse" + + def _fresh_plugin(self, monkeypatch=None): + mod_name = "plugins.observability.langfuse" + sys.modules.pop(mod_name, None) + mod = importlib.import_module(mod_name) + if monkeypatch is not None: + # Pretend the SDK is installed so `_get_langfuse()` actually + # reaches the placeholder check. Real SDK calls are never + # made because the placeholder/missing-credentials paths + # return before constructing a client. + _FakeLangfuse.instances.clear() + monkeypatch.setattr(mod, "Langfuse", _FakeLangfuse, raising=False) + return mod + + @staticmethod + def _clear_env(monkeypatch): + for k in ( + "HERMES_LANGFUSE_PUBLIC_KEY", "HERMES_LANGFUSE_SECRET_KEY", + "LANGFUSE_PUBLIC_KEY", "LANGFUSE_SECRET_KEY", + ): + monkeypatch.delenv(k, raising=False) + + # -- helper unit tests (no SDK stub needed: these don't go through + # _get_langfuse, they exercise the pure-Python helpers directly) ------ + + def test_redact_key_preview_empty(self, monkeypatch): + self._clear_env(monkeypatch) + plugin = self._fresh_plugin() + assert plugin._redact_key_preview("") == "" + + def test_redact_key_preview_short_value_echoed(self, monkeypatch): + """Short placeholder strings are echoed in full so the operator + can see exactly which template they forgot to replace.""" + self._clear_env(monkeypatch) + plugin = self._fresh_plugin() + assert plugin._redact_key_preview("placeholder") == "'placeholder'" + assert plugin._redact_key_preview("test-key") == "'test-key'" + + def test_redact_key_preview_long_value_truncated(self, monkeypatch): + """If an operator pasted a real secret into the wrong env var the + preview must NOT echo it in full โ€” only the leading 6 chars.""" + self._clear_env(monkeypatch) + plugin = self._fresh_plugin() + result = plugin._redact_key_preview("sk-lf-abcdefghijklmnop") + assert "abcdefghij" not in result + assert result.startswith("'sk-lf-") + assert result.endswith("...'") + + def test_validate_langfuse_key_accepts_documented_prefix(self, monkeypatch): + self._clear_env(monkeypatch) + plugin = self._fresh_plugin() + assert plugin._validate_langfuse_key( + "HERMES_LANGFUSE_PUBLIC_KEY", "pk-lf-real-public-xyz" + ) is None + assert plugin._validate_langfuse_key( + "HERMES_LANGFUSE_SECRET_KEY", "sk-lf-real-secret-xyz" + ) is None + + def test_validate_langfuse_key_rejects_wrong_prefix(self, monkeypatch): + self._clear_env(monkeypatch) + plugin = self._fresh_plugin() + msg = plugin._validate_langfuse_key( + "HERMES_LANGFUSE_PUBLIC_KEY", "placeholder" + ) + assert msg is not None + assert "HERMES_LANGFUSE_PUBLIC_KEY" in msg + assert "pk-lf-" in msg + + def test_validate_langfuse_key_unknown_name_passes(self, monkeypatch): + """Defensive: an env var with no registered prefix is trusted.""" + self._clear_env(monkeypatch) + plugin = self._fresh_plugin() + assert plugin._validate_langfuse_key("HERMES_LANGFUSE_BASE_URL", "anything") is None + + # -- end-to-end _get_langfuse() behaviour -------------------------------- + # These tests pass `monkeypatch` to _fresh_plugin() so the helper can + # stub out `Langfuse` (the optional SDK). Without that, every call + # short-circuits at `if Langfuse is None` before reaching the + # placeholder validator โ€” masking the very behaviour we're testing. + + def test_placeholder_public_key_warns_and_skips(self, monkeypatch, caplog): + self._clear_env(monkeypatch) + monkeypatch.setenv("HERMES_LANGFUSE_PUBLIC_KEY", "placeholder") + monkeypatch.setenv("HERMES_LANGFUSE_SECRET_KEY", "sk-lf-real-secret-xyz") + plugin = self._fresh_plugin(monkeypatch) + with caplog.at_level(logging.WARNING, logger=self.LOGGER_NAME): + assert plugin._get_langfuse() is None + text = caplog.text + assert "HERMES_LANGFUSE_PUBLIC_KEY" in text + assert "'placeholder'" in text + assert "pk-lf-" in text + # The valid secret value must NOT appear (the var NAME does, in + # the "or unset ..." hint, but the value preview shouldn't). + assert "'sk-lf-" not in text + # Never constructed the SDK client โ€” short-circuited before that. + assert _FakeLangfuse.instances == [] + + def test_placeholder_secret_key_warns_and_skips(self, monkeypatch, caplog): + self._clear_env(monkeypatch) + monkeypatch.setenv("HERMES_LANGFUSE_PUBLIC_KEY", "pk-lf-real-public-xyz") + monkeypatch.setenv("HERMES_LANGFUSE_SECRET_KEY", "test-key") + plugin = self._fresh_plugin(monkeypatch) + with caplog.at_level(logging.WARNING, logger=self.LOGGER_NAME): + assert plugin._get_langfuse() is None + text = caplog.text + assert "HERMES_LANGFUSE_SECRET_KEY" in text + assert "'test-key'" in text + assert "sk-lf-" in text + # The valid public value must NOT appear. + assert "'pk-lf-" not in text + assert _FakeLangfuse.instances == [] + + def test_both_placeholders_one_warning_with_both_keys(self, monkeypatch, caplog): + self._clear_env(monkeypatch) + monkeypatch.setenv("HERMES_LANGFUSE_PUBLIC_KEY", "placeholder") + monkeypatch.setenv("HERMES_LANGFUSE_SECRET_KEY", "placeholder") + plugin = self._fresh_plugin(monkeypatch) + with caplog.at_level(logging.WARNING, logger=self.LOGGER_NAME): + assert plugin._get_langfuse() is None + warnings = [r for r in caplog.records if r.levelname == "WARNING" + and r.name == self.LOGGER_NAME] + assert len(warnings) == 1, ( + f"Expected a single combined warning; got {len(warnings)}:\n" + + "\n".join(r.getMessage() for r in warnings) + ) + text = warnings[0].getMessage() + assert "HERMES_LANGFUSE_PUBLIC_KEY" in text + assert "HERMES_LANGFUSE_SECRET_KEY" in text + + def test_repeated_calls_do_not_re_warn(self, monkeypatch, caplog): + """The cached ``_INIT_FAILED`` sentinel must short-circuit + subsequent calls so each hook invocation isn't a fresh log + line โ€” otherwise a busy gateway will spam the operator's + terminal.""" + self._clear_env(monkeypatch) + monkeypatch.setenv("HERMES_LANGFUSE_PUBLIC_KEY", "placeholder") + monkeypatch.setenv("HERMES_LANGFUSE_SECRET_KEY", "placeholder") + plugin = self._fresh_plugin(monkeypatch) + with caplog.at_level(logging.WARNING, logger=self.LOGGER_NAME): + for _ in range(15): + assert plugin._get_langfuse() is None + warnings = [r for r in caplog.records if r.levelname == "WARNING" + and r.name == self.LOGGER_NAME] + assert len(warnings) == 1, ( + f"Warning fired {len(warnings)} times across 15 calls; " + "expected 1 (cached via _INIT_FAILED)" + ) + + @pytest.mark.parametrize("placeholder", [ + "placeholder", + "test-key", + "your-langfuse-key", + "change-me", + "xxx", + "dummy-key-here", + "", + "REPLACE_ME", + ]) + def test_common_placeholders_detected(self, monkeypatch, caplog, placeholder): + """A grab-bag of values that real-world ``.env.example`` templates + use as stand-ins. Any of them in either key must trip the guard.""" + self._clear_env(monkeypatch) + monkeypatch.setenv("HERMES_LANGFUSE_PUBLIC_KEY", placeholder) + monkeypatch.setenv("HERMES_LANGFUSE_SECRET_KEY", "sk-lf-real-secret-xyz") + plugin = self._fresh_plugin(monkeypatch) + with caplog.at_level(logging.WARNING, logger=self.LOGGER_NAME): + assert plugin._get_langfuse() is None + assert "HERMES_LANGFUSE_PUBLIC_KEY" in caplog.text + + def test_legacy_LANGFUSE_PUBLIC_KEY_also_validated(self, monkeypatch, caplog): + """The plugin reads both the canonical HERMES_-prefixed env var and + the legacy bare ``LANGFUSE_PUBLIC_KEY``. The validator must run on + whichever value ``_get_langfuse()`` actually consumed.""" + self._clear_env(monkeypatch) + monkeypatch.setenv("LANGFUSE_PUBLIC_KEY", "placeholder") + monkeypatch.setenv("LANGFUSE_SECRET_KEY", "sk-lf-real-secret-xyz") + plugin = self._fresh_plugin(monkeypatch) + with caplog.at_level(logging.WARNING, logger=self.LOGGER_NAME): + assert plugin._get_langfuse() is None + # Warning names the canonical user-facing env var (the bare + # LANGFUSE_PUBLIC_KEY is a backwards-compat alias for the + # HERMES_-prefixed one โ€” operators set the HERMES_-prefixed one). + assert "HERMES_LANGFUSE_PUBLIC_KEY" in caplog.text + assert "'placeholder'" in caplog.text + + def test_missing_credentials_still_skip_silently(self, monkeypatch, caplog): + """Missing-creds is the documented opt-out path (operator hasn't + configured the plugin yet) โ€” it must remain SILENT. Regression + guard against the placeholder validator accidentally running on + empty values and re-introducing log noise for unconfigured + installs.""" + self._clear_env(monkeypatch) + plugin = self._fresh_plugin(monkeypatch) + with caplog.at_level(logging.WARNING, logger=self.LOGGER_NAME): + assert plugin._get_langfuse() is None + warnings = [r for r in caplog.records if r.levelname == "WARNING" + and r.name == self.LOGGER_NAME] + assert warnings == [] + + def test_sdk_not_installed_still_skips_silently(self, monkeypatch, caplog): + """If the langfuse SDK isn't installed at all, the placeholder + check should never run โ€” there's nothing the operator can do + about a credential mismatch when the package is missing, and + re-warning here would dilute the actually-actionable SDK-missing + signal upstream. The ``Langfuse is None`` guard at the top of + ``_get_langfuse`` already handles this; this test pins that + behaviour.""" + self._clear_env(monkeypatch) + monkeypatch.setenv("HERMES_LANGFUSE_PUBLIC_KEY", "placeholder") + monkeypatch.setenv("HERMES_LANGFUSE_SECRET_KEY", "placeholder") + # NO monkeypatch on Langfuse here โ€” falls back to whatever the + # plugin imported at module load (None if SDK absent). + plugin = self._fresh_plugin() + monkeypatch.setattr(plugin, "Langfuse", None, raising=False) + with caplog.at_level(logging.WARNING, logger=self.LOGGER_NAME): + assert plugin._get_langfuse() is None + warnings = [r for r in caplog.records if r.levelname == "WARNING" + and r.name == self.LOGGER_NAME] + assert warnings == [] + + def test_valid_prefixes_do_not_trigger_placeholder_warning(self, monkeypatch, caplog): + """Real Langfuse keys (``pk-lf-โ€ฆ`` / ``sk-lf-โ€ฆ``) must pass the + guard and proceed to SDK init. We stub the SDK constructor with + a recording fake so the assertion can confirm BOTH that the + placeholder warning didn't fire AND that the client was actually + constructed โ€” the latter is the success signal the bug report + wanted.""" + self._clear_env(monkeypatch) + monkeypatch.setenv("HERMES_LANGFUSE_PUBLIC_KEY", "pk-lf-real-public-xyz") + monkeypatch.setenv("HERMES_LANGFUSE_SECRET_KEY", "sk-lf-real-secret-xyz") + plugin = self._fresh_plugin(monkeypatch) + with caplog.at_level(logging.WARNING, logger=self.LOGGER_NAME): + client = plugin._get_langfuse() + assert isinstance(client, _FakeLangfuse) + assert client.kwargs["public_key"] == "pk-lf-real-public-xyz" + assert client.kwargs["secret_key"] == "sk-lf-real-secret-xyz" + assert "placeholders" not in caplog.text.lower(), ( + f"Valid Langfuse keys tripped the placeholder guard: {caplog.text!r}" + ) + + +class TestRequestMessageCoercion: + def test_prefers_request_messages_then_messages_then_history_then_user_message(self): + sys.modules.pop("plugins.observability.langfuse", None) + mod = importlib.import_module("plugins.observability.langfuse") + + assert mod._coerce_request_messages( + request_messages=[{"role": "system", "content": "s"}], + messages=[{"role": "user", "content": "m"}], + conversation_history=[{"role": "user", "content": "h"}], + user_message="u", + ) == [{"role": "system", "content": "s"}] + assert mod._coerce_request_messages( + messages=[{"role": "user", "content": "m"}], + conversation_history=[{"role": "user", "content": "h"}], + user_message="u", + ) == [{"role": "user", "content": "m"}] + assert mod._coerce_request_messages( + conversation_history=[{"role": "user", "content": "h"}], + user_message="u", + ) == [{"role": "user", "content": "h"}] + assert mod._coerce_request_messages(user_message="u") == [{"role": "user", "content": "u"}] + + +class TestToolCallOutputBackfill: + def test_post_tool_call_backfills_matching_turn_tool_call_output(self, monkeypatch): + sys.modules.pop("plugins.observability.langfuse", None) + mod = importlib.import_module("plugins.observability.langfuse") + + observation = object() + state = mod.TraceState(trace_id="trace-1", root_ctx=None, root_span=None) + state.tools["call-1"] = observation + state.turn_tool_calls.append({ + "id": "call-1", + "type": "function", + "name": "web_extract", + "arguments": '{"urls": ["https://example.com"]}', + "function": { + "name": "web_extract", + "arguments": '{"urls": ["https://example.com"]}', + }, + }) + + task_key = mod._trace_key("task-1", "session-1") + monkeypatch.setitem(mod._TRACE_STATE, task_key, state) + + ended = {} + + def fake_end_observation(obs, *, output=None, metadata=None, usage_details=None, cost_details=None): + ended["observation"] = obs + ended["output"] = output + ended["metadata"] = metadata + + monkeypatch.setattr(mod, "_end_observation", fake_end_observation) + + mod.on_post_tool_call( + tool_name="web_extract", + args={"urls": ["https://example.com"]}, + result='{"results": [{"url": "https://example.com", "content": "Example Domain"}]}', + task_id="task-1", + session_id="session-1", + tool_call_id="call-1", + ) + + assert ended["observation"] is observation + assert state.turn_tool_calls[0]["output"] == ended["output"] + assert state.turn_tool_calls[0]["function"]["output"] == ended["output"] + assert state.turn_tool_calls[0]["output"] == { + "results": [{"url": "https://example.com", "content": "Example Domain"}] + } + + def test_serialize_messages_keeps_tool_name_and_call_id(self): + sys.modules.pop("plugins.observability.langfuse", None) + mod = importlib.import_module("plugins.observability.langfuse") + + messages = [{ + "role": "tool", + "name": "web_extract", + "tool_call_id": "call-1", + "content": '{"ok": true}', + }] + + assert mod._serialize_messages(messages) == [{ + "role": "tool", + "name": "web_extract", + "tool_call_id": "call-1", + "content": {"ok": True}, + }] + + def test_serialize_tool_calls_emits_openai_style_function_shape(self): + sys.modules.pop("plugins.observability.langfuse", None) + mod = importlib.import_module("plugins.observability.langfuse") + + class _Fn: + name = "web_extract" + arguments = '{"urls": ["https://example.com"]}' + + class _ToolCall: + id = "call-1" + type = "function" + function = _Fn() + + assert mod._serialize_tool_calls([_ToolCall()]) == [{ + "id": "call-1", + "type": "function", + "name": "web_extract", + "arguments": '{"urls": ["https://example.com"]}', + "function": { + "name": "web_extract", + "arguments": '{"urls": ["https://example.com"]}', + }, + }] + + +class TestToolObservationKeying: + """Tests for pre/post tool_call observation matching when tool_call_id is absent.""" + + def _make_mod(self): + sys.modules.pop("plugins.observability.langfuse", None) + return importlib.import_module("plugins.observability.langfuse") + + def test_empty_tool_call_id_single_tool_sets_output(self, monkeypatch): + mod = self._make_mod() + obs = object() + state = mod.TraceState(trace_id="t", root_ctx=None, root_span=None) + state.pending_tools_by_name.setdefault("my_tool", []).append(obs) + + task_key = mod._trace_key("task-1", "sess-1") + monkeypatch.setitem(mod._TRACE_STATE, task_key, state) + + ended = {} + + def fake_end(o, *, output=None, metadata=None, **kw): + ended["obs"] = o + ended["output"] = output + + monkeypatch.setattr(mod, "_end_observation", fake_end) + + mod.on_post_tool_call( + tool_name="my_tool", + args={}, + result='{"ok": true}', + task_id="task-1", + session_id="sess-1", + tool_call_id="", + ) + + assert ended["obs"] is obs + assert ended["output"] == {"ok": True} + assert state.pending_tools_by_name.get("my_tool") is None + + def test_empty_tool_call_id_observations_are_fifo_within_tool_name(self, monkeypatch): + """Two queued observations are consumed in FIFO order so the first + post hook gets the first observation's output, not the second. + + Sequential-on-one-thread coverage; the real concurrent case is + guarded by ``_STATE_LOCK`` around every read-modify-write on + ``pending_tools_by_name`` and is exercised in + ``test_threaded_post_calls_preserve_fifo_under_lock`` below. + """ + mod = self._make_mod() + obs_a, obs_b = object(), object() + state = mod.TraceState(trace_id="t", root_ctx=None, root_span=None) + state.pending_tools_by_name["web_extract"] = [obs_a, obs_b] + + task_key = mod._trace_key("task-1", "sess-1") + monkeypatch.setitem(mod._TRACE_STATE, task_key, state) + + calls = [] + + def fake_end(o, *, output=None, metadata=None, **kw): + calls.append((o, output)) + + monkeypatch.setattr(mod, "_end_observation", fake_end) + + mod.on_post_tool_call( + tool_name="web_extract", args={}, result='{"val": "a"}', + task_id="task-1", session_id="sess-1", tool_call_id="", + ) + mod.on_post_tool_call( + tool_name="web_extract", args={}, result='{"val": "b"}', + task_id="task-1", session_id="sess-1", tool_call_id="", + ) + + assert calls[0] == (obs_a, {"val": "a"}) + assert calls[1] == (obs_b, {"val": "b"}) + assert state.pending_tools_by_name.get("web_extract") is None + + def test_threaded_post_calls_preserve_fifo_under_lock(self, monkeypatch): + """The actual concurrency contract: when 8 threads race to drain + the pending queue, no observation is consumed twice and none is + lost. Validates ``_STATE_LOCK`` discipline, not Python list + semantics.""" + import threading + + mod = self._make_mod() + n = 8 + observations = [object() for _ in range(n)] + state = mod.TraceState(trace_id="t", root_ctx=None, root_span=None) + state.pending_tools_by_name["web_extract"] = list(observations) + + task_key = mod._trace_key("task-thr", "sess-thr") + monkeypatch.setitem(mod._TRACE_STATE, task_key, state) + + recorded: list = [] + lock = threading.Lock() + + def fake_end(o, *, output=None, metadata=None, **kw): + with lock: + recorded.append(o) + + monkeypatch.setattr(mod, "_end_observation", fake_end) + + barrier = threading.Barrier(n) + + def worker(): + barrier.wait() + mod.on_post_tool_call( + tool_name="web_extract", args={}, result='{"ok": true}', + task_id="task-thr", session_id="sess-thr", tool_call_id="", + ) + + threads = [threading.Thread(target=worker) for _ in range(n)] + for t in threads: + t.start() + for t in threads: + t.join() + + # Every observation was consumed exactly once; queue is empty. + assert len(recorded) == n + assert set(map(id, recorded)) == set(map(id, observations)) + assert state.pending_tools_by_name.get("web_extract") is None + + def test_explicit_tool_call_id_uses_tools_dict(self, monkeypatch): + """When tool_call_id is present, pending_tools_by_name is not touched.""" + mod = self._make_mod() + obs = object() + state = mod.TraceState(trace_id="t", root_ctx=None, root_span=None) + state.tools["call-99"] = obs + + task_key = mod._trace_key("task-1", "sess-1") + monkeypatch.setitem(mod._TRACE_STATE, task_key, state) + + ended = {} + + def fake_end(o, *, output=None, metadata=None, **kw): + ended["obs"] = o + ended["output"] = output + + monkeypatch.setattr(mod, "_end_observation", fake_end) + + mod.on_post_tool_call( + tool_name="my_tool", args={}, result='{"status": "done"}', + task_id="task-1", session_id="sess-1", tool_call_id="call-99", + ) + + assert ended["obs"] is obs + assert ended["output"] == {"status": "done"} + assert not state.tools + diff --git a/tests/providers/test_plugin_discovery.py b/tests/providers/test_plugin_discovery.py index 9ad6713e3ec8..a7cbb7d90303 100644 --- a/tests/providers/test_plugin_discovery.py +++ b/tests/providers/test_plugin_discovery.py @@ -46,14 +46,14 @@ def test_bundled_plugins_discovered(): assert (child / "plugin.yaml").exists(), f"{child.name} missing plugin.yaml" -def test_all_33_profiles_register(): - """After discovery, the registry must contain exactly 33 distinct profiles.""" +def test_all_34_profiles_register(): + """After discovery, the registry must contain exactly 34 distinct profiles.""" _clear_provider_caches() from providers import list_providers profiles = list_providers() names = sorted(p.name for p in profiles) - assert len(names) == 33, f"Expected 33 profiles, got {len(names)}: {names}" + assert len(names) == 34, f"Expected 34 profiles, got {len(names)}: {names}" # Spot-check representative providers from different categories for required in ( diff --git a/tests/run_agent/test_agent_loop.py b/tests/run_agent/test_agent_loop.py deleted file mode 100644 index bd9e41b91e2a..000000000000 --- a/tests/run_agent/test_agent_loop.py +++ /dev/null @@ -1,505 +0,0 @@ -""" -Tests for environments/agent_loop.py โ€” HermesAgentLoop. - -Tests the multi-turn agent engine using mocked servers, without needing -real API keys or running servers. -""" - -import asyncio -import json -import sys -from dataclasses import dataclass -from pathlib import Path -from typing import Any, Dict, List, Optional -from unittest.mock import MagicMock - -import pytest - -# Ensure repo root is importable -sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent)) - -try: - from environments.agent_loop import ( - AgentResult, - HermesAgentLoop, - ToolError, - _extract_reasoning_from_message, - resize_tool_pool, - ) -except ImportError: - pytest.skip("atroposlib not installed", allow_module_level=True) - - -# โ”€โ”€โ”€ Mock server infrastructure โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ - - -@dataclass -class MockFunction: - name: str - arguments: str - - -@dataclass -class MockToolCall: - id: str - function: MockFunction - type: str = "function" - - -@dataclass -class MockMessage: - content: Optional[str] - role: str = "assistant" - tool_calls: Optional[List[MockToolCall]] = None - reasoning_content: Optional[str] = None - reasoning: Optional[str] = None - reasoning_details: Optional[list] = None - - -@dataclass -class MockChoice: - message: MockMessage - finish_reason: str = "stop" - index: int = 0 - - -@dataclass -class MockChatCompletion: - choices: List[MockChoice] - id: str = "chatcmpl-mock" - model: str = "mock-model" - - -class MockServer: - """ - Mock server that returns pre-configured responses in sequence. - Mimics the chat_completion() interface. - """ - - def __init__(self, responses: List[MockChatCompletion]): - self.responses = responses - self.call_count = 0 - self.call_history: List[Dict[str, Any]] = [] - - async def chat_completion(self, **kwargs) -> MockChatCompletion: - self.call_history.append(kwargs) - if self.call_count >= len(self.responses): - # Return a simple text response if we run out - return MockChatCompletion( - choices=[MockChoice(message=MockMessage(content="Done."))] - ) - resp = self.responses[self.call_count] - self.call_count += 1 - return resp - - -def make_text_response(content: str) -> MockChatCompletion: - """Create a simple text-only response (no tool calls).""" - return MockChatCompletion( - choices=[MockChoice(message=MockMessage(content=content))] - ) - - -def make_tool_response( - tool_name: str, - arguments: dict, - content: str = "", - tool_call_id: str = "call_001", -) -> MockChatCompletion: - """Create a response with a single tool call.""" - return MockChatCompletion( - choices=[ - MockChoice( - message=MockMessage( - content=content, - tool_calls=[ - MockToolCall( - id=tool_call_id, - function=MockFunction( - name=tool_name, - arguments=json.dumps(arguments), - ), - ) - ], - ), - finish_reason="tool_calls", - ) - ] - ) - - -# โ”€โ”€โ”€ Tests โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ - - -class TestAgentResult: - def test_defaults(self): - result = AgentResult(messages=[]) - assert result.messages == [] - assert result.managed_state is None - assert result.turns_used == 0 - assert result.finished_naturally is False - assert result.reasoning_per_turn == [] - assert result.tool_errors == [] - - -class TestExtractReasoning: - def test_reasoning_content_field(self): - msg = MockMessage(content="hello", reasoning_content="I think...") - assert _extract_reasoning_from_message(msg) == "I think..." - - def test_reasoning_field(self): - msg = MockMessage(content="hello", reasoning="Let me consider...") - assert _extract_reasoning_from_message(msg) == "Let me consider..." - - def test_reasoning_details(self): - detail = MagicMock() - detail.text = "Detail reasoning" - msg = MockMessage(content="hello", reasoning_details=[detail]) - assert _extract_reasoning_from_message(msg) == "Detail reasoning" - - def test_reasoning_details_dict_format(self): - msg = MockMessage( - content="hello", - reasoning_details=[{"text": "Dict reasoning"}], - ) - assert _extract_reasoning_from_message(msg) == "Dict reasoning" - - def test_no_reasoning(self): - msg = MockMessage(content="hello") - assert _extract_reasoning_from_message(msg) is None - - def test_reasoning_content_takes_priority(self): - msg = MockMessage( - content="hello", - reasoning_content="First", - reasoning="Second", - ) - assert _extract_reasoning_from_message(msg) == "First" - - -class TestHermesAgentLoop: - """Test the agent loop with mock servers.""" - - @pytest.fixture - def basic_tools(self): - """Minimal tool schema for testing.""" - return [ - { - "type": "function", - "function": { - "name": "terminal", - "description": "Run a command", - "parameters": { - "type": "object", - "properties": { - "command": { - "type": "string", - "description": "Command to run", - } - }, - "required": ["command"], - }, - }, - }, - { - "type": "function", - "function": { - "name": "read_file", - "description": "Read a file", - "parameters": { - "type": "object", - "properties": { - "path": {"type": "string"}, - }, - "required": ["path"], - }, - }, - }, - ] - - @pytest.fixture - def valid_names(self): - return {"terminal", "read_file", "todo"} - - @pytest.mark.asyncio - async def test_simple_text_response(self, basic_tools, valid_names): - """Model responds with text only, no tool calls.""" - server = MockServer([make_text_response("Hello! How can I help?")]) - agent = HermesAgentLoop( - server=server, - tool_schemas=basic_tools, - valid_tool_names=valid_names, - max_turns=10, - ) - messages = [{"role": "user", "content": "Hi"}] - result = await agent.run(messages) - - assert result.finished_naturally is True - assert result.turns_used == 1 - assert len(result.messages) >= 2 # user + assistant - assert result.messages[-1]["role"] == "assistant" - assert result.messages[-1]["content"] == "Hello! How can I help?" - - @pytest.mark.asyncio - async def test_tool_call_then_text(self, basic_tools, valid_names): - """Model calls a tool, then responds with text.""" - server = MockServer([ - make_tool_response("todo", {"todos": [{"id": "1", "content": "test", "status": "pending"}]}), - make_text_response("I created a todo for you."), - ]) - agent = HermesAgentLoop( - server=server, - tool_schemas=basic_tools, - valid_tool_names=valid_names, - max_turns=10, - ) - messages = [{"role": "user", "content": "Create a todo"}] - result = await agent.run(messages) - - assert result.finished_naturally is True - assert result.turns_used == 2 - # Should have: user, assistant (tool_call), tool (result), assistant (text) - roles = [m["role"] for m in result.messages] - assert roles == ["user", "assistant", "tool", "assistant"] - - @pytest.mark.asyncio - async def test_max_turns_reached(self, basic_tools, valid_names): - """Model keeps calling tools until max_turns is hit.""" - # Create responses that always call a tool - responses = [ - make_tool_response("todo", {"todos": [{"id": str(i), "content": f"task {i}", "status": "pending"}]}, tool_call_id=f"call_{i}") - for i in range(10) - ] - server = MockServer(responses) - agent = HermesAgentLoop( - server=server, - tool_schemas=basic_tools, - valid_tool_names=valid_names, - max_turns=3, - ) - messages = [{"role": "user", "content": "Keep going"}] - result = await agent.run(messages) - - assert result.finished_naturally is False - assert result.turns_used == 3 - - @pytest.mark.asyncio - async def test_unknown_tool_name(self, basic_tools, valid_names): - """Model calls a tool not in valid_tool_names.""" - server = MockServer([ - make_tool_response("nonexistent_tool", {"arg": "val"}), - make_text_response("OK, that didn't work."), - ]) - agent = HermesAgentLoop( - server=server, - tool_schemas=basic_tools, - valid_tool_names=valid_names, - max_turns=10, - ) - messages = [{"role": "user", "content": "Call something weird"}] - result = await agent.run(messages) - - # Should record a tool error - assert len(result.tool_errors) >= 1 - assert result.tool_errors[0].tool_name == "nonexistent_tool" - - @pytest.mark.asyncio - async def test_empty_response(self, basic_tools, valid_names): - """Server returns empty response.""" - server = MockServer([MockChatCompletion(choices=[])]) - agent = HermesAgentLoop( - server=server, - tool_schemas=basic_tools, - valid_tool_names=valid_names, - max_turns=10, - ) - messages = [{"role": "user", "content": "Hi"}] - result = await agent.run(messages) - - assert result.finished_naturally is False - assert result.turns_used == 1 - - @pytest.mark.asyncio - async def test_api_error_handling(self, basic_tools, valid_names): - """Server raises an exception.""" - - class FailingServer: - async def chat_completion(self, **kwargs): - raise ConnectionError("Server unreachable") - - agent = HermesAgentLoop( - server=FailingServer(), - tool_schemas=basic_tools, - valid_tool_names=valid_names, - max_turns=10, - ) - messages = [{"role": "user", "content": "Hi"}] - result = await agent.run(messages) - - assert result.finished_naturally is False - assert result.turns_used == 1 - - @pytest.mark.asyncio - async def test_tools_passed_to_server(self, basic_tools, valid_names): - """Verify tools are passed in the chat_completion kwargs.""" - server = MockServer([make_text_response("OK")]) - agent = HermesAgentLoop( - server=server, - tool_schemas=basic_tools, - valid_tool_names=valid_names, - max_turns=10, - ) - messages = [{"role": "user", "content": "Hi"}] - await agent.run(messages) - - assert len(server.call_history) == 1 - assert "tools" in server.call_history[0] - assert server.call_history[0]["tools"] == basic_tools - - @pytest.mark.asyncio - async def test_extra_body_forwarded(self, basic_tools, valid_names): - """extra_body should be forwarded to server.""" - extra = {"provider": {"ignore": ["DeepInfra"]}} - server = MockServer([make_text_response("OK")]) - agent = HermesAgentLoop( - server=server, - tool_schemas=basic_tools, - valid_tool_names=valid_names, - max_turns=10, - extra_body=extra, - ) - messages = [{"role": "user", "content": "Hi"}] - await agent.run(messages) - - assert server.call_history[0].get("extra_body") == extra - - @pytest.mark.asyncio - async def test_managed_state_returned(self, basic_tools, valid_names): - """If server has get_state(), result should include managed_state.""" - server = MockServer([make_text_response("OK")]) - server.get_state = lambda: {"nodes": [{"test": True}]} - - agent = HermesAgentLoop( - server=server, - tool_schemas=basic_tools, - valid_tool_names=valid_names, - max_turns=10, - ) - messages = [{"role": "user", "content": "Hi"}] - result = await agent.run(messages) - - assert result.managed_state is not None - assert "nodes" in result.managed_state - - @pytest.mark.asyncio - async def test_no_managed_state_without_get_state(self, basic_tools, valid_names): - """Regular server without get_state() should return None managed_state.""" - server = MockServer([make_text_response("OK")]) - agent = HermesAgentLoop( - server=server, - tool_schemas=basic_tools, - valid_tool_names=valid_names, - max_turns=10, - ) - messages = [{"role": "user", "content": "Hi"}] - result = await agent.run(messages) - - assert result.managed_state is None - - @pytest.mark.asyncio - async def test_memory_tool_blocked(self, basic_tools): - """Memory tool should return error in RL environments.""" - valid = {"terminal", "read_file", "todo", "memory"} - server = MockServer([ - make_tool_response("memory", {"action": "add", "target": "user", "content": "test"}), - make_text_response("Done"), - ]) - agent = HermesAgentLoop( - server=server, - tool_schemas=basic_tools, - valid_tool_names=valid, - max_turns=10, - ) - messages = [{"role": "user", "content": "Remember this"}] - result = await agent.run(messages) - - # Find the tool response - tool_msgs = [m for m in result.messages if m["role"] == "tool"] - assert len(tool_msgs) >= 1 - tool_result = json.loads(tool_msgs[0]["content"]) - assert "error" in tool_result - assert "not available" in tool_result["error"].lower() - - @pytest.mark.asyncio - async def test_session_search_blocked(self, basic_tools): - """session_search should return error in RL environments.""" - valid = {"terminal", "read_file", "todo", "session_search"} - server = MockServer([ - make_tool_response("session_search", {"query": "test"}), - make_text_response("Done"), - ]) - agent = HermesAgentLoop( - server=server, - tool_schemas=basic_tools, - valid_tool_names=valid, - max_turns=10, - ) - messages = [{"role": "user", "content": "Search sessions"}] - result = await agent.run(messages) - - tool_msgs = [m for m in result.messages if m["role"] == "tool"] - assert len(tool_msgs) >= 1 - tool_result = json.loads(tool_msgs[0]["content"]) - assert "error" in tool_result - - @pytest.mark.asyncio - async def test_reasoning_content_preserved(self, basic_tools, valid_names): - """Reasoning content should be extracted and preserved.""" - resp = MockChatCompletion( - choices=[ - MockChoice( - message=MockMessage( - content="The answer is 42.", - reasoning_content="Let me think about this step by step...", - ) - ) - ] - ) - server = MockServer([resp]) - agent = HermesAgentLoop( - server=server, - tool_schemas=basic_tools, - valid_tool_names=valid_names, - max_turns=10, - ) - messages = [{"role": "user", "content": "What is the meaning of life?"}] - result = await agent.run(messages) - - assert len(result.reasoning_per_turn) == 1 - assert result.reasoning_per_turn[0] == "Let me think about this step by step..." - - -class TestResizeToolPool: - def test_resize_works(self): - """resize_tool_pool should not raise.""" - resize_tool_pool(16) # Small pool for testing - resize_tool_pool(128) # Restore default - - def test_resize_shuts_down_previous_executor(self, monkeypatch): - """Replacing the global tool executor should shut down the old pool.""" - import environments.agent_loop as agent_loop_module - - old_executor = MagicMock() - new_executor = MagicMock() - - monkeypatch.setattr(agent_loop_module, "_tool_executor", old_executor) - monkeypatch.setattr( - agent_loop_module.concurrent.futures, - "ThreadPoolExecutor", - MagicMock(return_value=new_executor), - ) - - resize_tool_pool(16) - - old_executor.shutdown.assert_called_once_with(wait=False) - assert agent_loop_module._tool_executor is new_executor diff --git a/tests/run_agent/test_agent_loop_tool_calling.py b/tests/run_agent/test_agent_loop_tool_calling.py deleted file mode 100644 index 3b8d6ac5988c..000000000000 --- a/tests/run_agent/test_agent_loop_tool_calling.py +++ /dev/null @@ -1,552 +0,0 @@ -"""Integration tests for HermesAgentLoop tool calling. - -Tests the full agent loop with real LLM calls via OpenRouter. -Uses stepfun/step-3.5-flash:free by default (zero cost), falls back -to anthropic/claude-sonnet-4 if the free model is unavailable. - -These tests verify: -1. Single tool call: model calls a tool, gets result, responds -2. Multi-tool call: model calls multiple tools in one turn -3. Multi-turn: model calls tools across multiple turns -4. Unknown tool rejection: model calling a non-existent tool gets an error -5. Max turns: loop stops when max_turns is reached -6. No tools: model responds without calling any tools -7. Tool error handling: tool execution errors are captured - -Run: - pytest tests/test_agent_loop_tool_calling.py -v - pytest tests/test_agent_loop_tool_calling.py -v -k "single" # run one test -""" - -import asyncio -import json -import os -import sys -from pathlib import Path -from typing import Any, Dict, List, Set -from unittest.mock import patch - -import pytest - -# pytestmark removed โ€” tests skip gracefully via OPENROUTER_API_KEY check on line 59 - -# Ensure repo root is importable -_repo_root = Path(__file__).resolve().parent.parent.parent -if str(_repo_root) not in sys.path: - sys.path.insert(0, str(_repo_root)) - -try: - from environments.agent_loop import AgentResult, HermesAgentLoop - from atroposlib.envs.server_handling.openai_server import OpenAIServer # noqa: F401 -except ImportError: - pytest.skip("atroposlib not installed", allow_module_level=True) - - -# ========================================================================= -# Test infrastructure -# ========================================================================= - -# Models to try, in order of preference (free first) -_MODELS = [ - "stepfun/step-3.5-flash:free", - "google/gemini-2.0-flash-001", - "anthropic/claude-sonnet-4", -] - -def _get_api_key(): - key = os.getenv("OPENROUTER_API_KEY", "") - if not key: - pytest.skip("OPENROUTER_API_KEY not set") - return key - - -def _make_server(model: str = None): - """Create an OpenAI server for testing.""" - from atroposlib.envs.server_handling.openai_server import OpenAIServer - from atroposlib.envs.server_handling.server_manager import APIServerConfig - - config = APIServerConfig( - base_url="https://openrouter.ai/api/v1", - model_name=model or _MODELS[0], - server_type="openai", - api_key=_get_api_key(), - health_check=False, - ) - return OpenAIServer(config) - - -async def _try_models(test_fn): - """Try running a test with each model until one works.""" - last_error = None - for model in _MODELS: - try: - server = _make_server(model) - return await test_fn(server, model) - except Exception as e: - last_error = e - if "rate" in str(e).lower() or "limit" in str(e).lower(): - continue # Rate limited, try next model - raise # Real error - pytest.skip(f"All models failed. Last error: {last_error}") - - -# ========================================================================= -# Fake tools for testing -# ========================================================================= - -# Simple calculator tool -CALC_TOOL = { - "type": "function", - "function": { - "name": "calculate", - "description": "Calculate a math expression. Returns the numeric result.", - "parameters": { - "type": "object", - "properties": { - "expression": { - "type": "string", - "description": "Math expression to evaluate, e.g. '2 + 3'" - } - }, - "required": ["expression"], - }, - }, -} - -# Weather lookup tool -WEATHER_TOOL = { - "type": "function", - "function": { - "name": "get_weather", - "description": "Get the current weather for a city. Returns temperature and conditions.", - "parameters": { - "type": "object", - "properties": { - "city": { - "type": "string", - "description": "City name, e.g. 'Tokyo'" - } - }, - "required": ["city"], - }, - }, -} - -# Lookup tool (always succeeds) -LOOKUP_TOOL = { - "type": "function", - "function": { - "name": "lookup", - "description": "Look up a fact. Returns a short answer string.", - "parameters": { - "type": "object", - "properties": { - "query": { - "type": "string", - "description": "What to look up" - } - }, - "required": ["query"], - }, - }, -} - -# Error tool (always fails) -ERROR_TOOL = { - "type": "function", - "function": { - "name": "failing_tool", - "description": "A tool that always fails with an error.", - "parameters": { - "type": "object", - "properties": { - "input": {"type": "string"} - }, - "required": ["input"], - }, - }, -} - - -def _fake_tool_handler(tool_name: str, args: Dict[str, Any], **kwargs) -> str: - """Handle fake tool calls for testing.""" - if tool_name == "calculate": - expr = args.get("expression", "0") - try: - # Safe eval for simple math - result = eval(expr, {"__builtins__": {}}, {}) - return json.dumps({"result": result}) - except Exception as e: - return json.dumps({"error": str(e)}) - - elif tool_name == "get_weather": - city = args.get("city", "Unknown") - # Return canned weather - return json.dumps({ - "city": city, - "temperature": 22, - "conditions": "sunny", - "humidity": 45, - }) - - elif tool_name == "lookup": - query = args.get("query", "") - return json.dumps({"answer": f"The answer to '{query}' is 42."}) - - elif tool_name == "failing_tool": - raise RuntimeError("This tool always fails!") - - return json.dumps({"error": f"Unknown tool: {tool_name}"}) - - -# ========================================================================= -# Tests -# ========================================================================= - -@pytest.mark.asyncio -async def test_single_tool_call(): - """Model should call a single tool, get the result, and respond.""" - - async def _run(server, model): - agent = HermesAgentLoop( - server=server, - tool_schemas=[WEATHER_TOOL], - valid_tool_names={"get_weather"}, - max_turns=5, - temperature=0.0, - max_tokens=500, - ) - - messages = [ - {"role": "user", "content": "What's the weather in Tokyo? Use the get_weather tool."}, - ] - - with patch("environments.agent_loop.handle_function_call", side_effect=_fake_tool_handler): - result = await agent.run(messages) - - assert isinstance(result, AgentResult) - assert result.turns_used >= 2, f"Expected at least 2 turns (tool call + response), got {result.turns_used}" - - # Verify a tool call happened - tool_calls_found = False - for msg in result.messages: - if msg.get("role") == "assistant" and msg.get("tool_calls"): - for tc in msg["tool_calls"]: - if tc["function"]["name"] == "get_weather": - tool_calls_found = True - args = json.loads(tc["function"]["arguments"]) - assert "city" in args - assert tool_calls_found, "Model should have called get_weather" - - # Verify tool result is in conversation - tool_results = [m for m in result.messages if m.get("role") == "tool"] - assert len(tool_results) >= 1, "Should have at least one tool result" - - # Verify the final response references the weather - final_msg = result.messages[-1] - assert final_msg["role"] == "assistant" - assert final_msg["content"], "Final response should have content" - - return result - - await _try_models(_run) - - -@pytest.mark.asyncio -async def test_multi_tool_single_turn(): - """Model should call multiple tools in a single turn.""" - - async def _run(server, model): - agent = HermesAgentLoop( - server=server, - tool_schemas=[WEATHER_TOOL, CALC_TOOL], - valid_tool_names={"get_weather", "calculate"}, - max_turns=5, - temperature=0.0, - max_tokens=500, - ) - - messages = [ - {"role": "user", "content": ( - "I need two things at once: " - "1) What's the weather in Paris? Use get_weather. " - "2) What is 15 * 7? Use calculate. " - "Call BOTH tools in a single response." - )}, - ] - - with patch("environments.agent_loop.handle_function_call", side_effect=_fake_tool_handler): - result = await agent.run(messages) - - # Count distinct tools called - tools_called = set() - for msg in result.messages: - if msg.get("role") == "assistant" and msg.get("tool_calls"): - for tc in msg["tool_calls"]: - tools_called.add(tc["function"]["name"]) - - # At minimum, both tools should have been called (maybe in different turns) - assert "get_weather" in tools_called, f"get_weather not called. Called: {tools_called}" - assert "calculate" in tools_called, f"calculate not called. Called: {tools_called}" - - return result - - await _try_models(_run) - - -@pytest.mark.asyncio -async def test_multi_turn_conversation(): - """Agent should handle multiple turns of tool calls.""" - - async def _run(server, model): - agent = HermesAgentLoop( - server=server, - tool_schemas=[LOOKUP_TOOL, CALC_TOOL], - valid_tool_names={"lookup", "calculate"}, - max_turns=10, - temperature=0.0, - max_tokens=500, - ) - - messages = [ - {"role": "user", "content": ( - "First, use the lookup tool to look up 'meaning of life'. " - "Then use calculate to compute 6 * 7. " - "Do these in separate tool calls, one at a time." - )}, - ] - - with patch("environments.agent_loop.handle_function_call", side_effect=_fake_tool_handler): - result = await agent.run(messages) - - # Should have used both tools - tools_called = set() - for msg in result.messages: - if msg.get("role") == "assistant" and msg.get("tool_calls"): - for tc in msg["tool_calls"]: - tools_called.add(tc["function"]["name"]) - - assert "lookup" in tools_called, f"lookup not called. Called: {tools_called}" - assert "calculate" in tools_called, f"calculate not called. Called: {tools_called}" - - # Should finish naturally - assert result.finished_naturally, "Should finish naturally after answering" - - return result - - await _try_models(_run) - - -@pytest.mark.asyncio -async def test_unknown_tool_rejected(): - """If the model calls a tool not in valid_tool_names, it gets an error.""" - - async def _run(server, model): - # Only allow "calculate" but give schema for both - agent = HermesAgentLoop( - server=server, - tool_schemas=[CALC_TOOL, WEATHER_TOOL], - valid_tool_names={"calculate"}, # weather NOT allowed - max_turns=5, - temperature=0.0, - max_tokens=500, - ) - - messages = [ - {"role": "user", "content": "What's the weather in London? Use get_weather."}, - ] - - with patch("environments.agent_loop.handle_function_call", side_effect=_fake_tool_handler): - result = await agent.run(messages) - - # Check if get_weather was called and rejected - if result.tool_errors: - weather_errors = [e for e in result.tool_errors if e.tool_name == "get_weather"] - assert len(weather_errors) > 0, "get_weather should have been rejected" - assert "Unknown tool" in weather_errors[0].error - - return result - - await _try_models(_run) - - -@pytest.mark.asyncio -async def test_max_turns_limit(): - """Agent should stop after max_turns even if model keeps calling tools.""" - - async def _run(server, model): - agent = HermesAgentLoop( - server=server, - tool_schemas=[LOOKUP_TOOL], - valid_tool_names={"lookup"}, - max_turns=2, # Very low limit - temperature=0.0, - max_tokens=500, - ) - - messages = [ - {"role": "user", "content": ( - "Keep looking up facts. Look up 'fact 1', then 'fact 2', " - "then 'fact 3', then 'fact 4'. Do them one at a time." - )}, - ] - - with patch("environments.agent_loop.handle_function_call", side_effect=_fake_tool_handler): - result = await agent.run(messages) - - assert result.turns_used <= 2, f"Should stop at max_turns=2, used {result.turns_used}" - assert not result.finished_naturally, "Should NOT finish naturally (hit max_turns)" - - return result - - await _try_models(_run) - - -@pytest.mark.asyncio -async def test_no_tools_direct_response(): - """When no tools are useful, model should respond directly.""" - - async def _run(server, model): - agent = HermesAgentLoop( - server=server, - tool_schemas=[WEATHER_TOOL], - valid_tool_names={"get_weather"}, - max_turns=5, - temperature=0.0, - max_tokens=200, - ) - - messages = [ - {"role": "user", "content": "What is 2 + 2? Just answer directly, no tools needed."}, - ] - - with patch("environments.agent_loop.handle_function_call", side_effect=_fake_tool_handler): - result = await agent.run(messages) - - assert result.finished_naturally, "Should finish naturally with a direct response" - assert result.turns_used == 1, f"Should take exactly 1 turn for a direct answer, took {result.turns_used}" - - final = result.messages[-1] - assert final["role"] == "assistant" - assert final["content"], "Should have text content" - assert "4" in final["content"], "Should contain the answer '4'" - - return result - - await _try_models(_run) - - -@pytest.mark.asyncio -async def test_tool_error_handling(): - """Tool execution errors should be captured and reported to the model.""" - - async def _run(server, model): - agent = HermesAgentLoop( - server=server, - tool_schemas=[ERROR_TOOL], - valid_tool_names={"failing_tool"}, - max_turns=5, - temperature=0.0, - max_tokens=500, - ) - - messages = [ - {"role": "user", "content": "Please call the failing_tool with input 'test'."}, - ] - - with patch("environments.agent_loop.handle_function_call", side_effect=_fake_tool_handler): - result = await agent.run(messages) - - # The tool error should be recorded - assert len(result.tool_errors) >= 1, "Should have at least one tool error" - assert "RuntimeError" in result.tool_errors[0].error or "always fails" in result.tool_errors[0].error - - # The error should be in the conversation as a tool result - tool_results = [m for m in result.messages if m.get("role") == "tool"] - assert len(tool_results) >= 1 - error_result = json.loads(tool_results[0]["content"]) - assert "error" in error_result - - return result - - await _try_models(_run) - - -@pytest.mark.asyncio -async def test_agent_result_structure(): - """Verify the AgentResult has all expected fields populated.""" - - async def _run(server, model): - agent = HermesAgentLoop( - server=server, - tool_schemas=[CALC_TOOL], - valid_tool_names={"calculate"}, - max_turns=5, - temperature=0.0, - max_tokens=300, - ) - - messages = [ - {"role": "user", "content": "What is 3 + 4? Use the calculate tool."}, - ] - - with patch("environments.agent_loop.handle_function_call", side_effect=_fake_tool_handler): - result = await agent.run(messages) - - # Structural checks - assert isinstance(result, AgentResult) - assert isinstance(result.messages, list) - assert len(result.messages) >= 3, "Should have user + assistant(tool) + tool_result + assistant(final)" - assert isinstance(result.turns_used, int) - assert result.turns_used > 0 - assert isinstance(result.finished_naturally, bool) - assert isinstance(result.tool_errors, list) - assert isinstance(result.reasoning_per_turn, list) - - # Messages should follow OpenAI format - for msg in result.messages: - assert "role" in msg, f"Message missing 'role': {msg}" - assert msg["role"] in ("system", "user", "assistant", "tool"), f"Invalid role: {msg['role']}" - - return result - - await _try_models(_run) - - -@pytest.mark.asyncio -async def test_conversation_history_preserved(): - """The full conversation history should be in result.messages.""" - - async def _run(server, model): - agent = HermesAgentLoop( - server=server, - tool_schemas=[WEATHER_TOOL], - valid_tool_names={"get_weather"}, - max_turns=5, - temperature=0.0, - max_tokens=500, - ) - - messages = [ - {"role": "system", "content": "You are a helpful weather assistant."}, - {"role": "user", "content": "What's the weather in Berlin? Use get_weather."}, - ] - - with patch("environments.agent_loop.handle_function_call", side_effect=_fake_tool_handler): - result = await agent.run(messages) - - # System message should be preserved - assert result.messages[0]["role"] == "system" - assert "weather assistant" in result.messages[0]["content"] - - # User message should be preserved - assert result.messages[1]["role"] == "user" - assert "Berlin" in result.messages[1]["content"] - - # Should have assistant + tool + assistant sequence - roles = [m["role"] for m in result.messages] - assert "tool" in roles, "Should have tool results in conversation" - - return result - - await _try_models(_run) diff --git a/tests/run_agent/test_agent_loop_vllm.py b/tests/run_agent/test_agent_loop_vllm.py deleted file mode 100644 index d42849094145..000000000000 --- a/tests/run_agent/test_agent_loop_vllm.py +++ /dev/null @@ -1,359 +0,0 @@ -"""Integration tests for HermesAgentLoop with a local vLLM server. - -Tests the full Phase 2 flow: ManagedServer + tool calling with a real -vLLM backend, producing actual token IDs and logprobs for RL training. - -Requires a running vLLM server. Start one from the atropos directory: - - python -m example_trainer.vllm_api_server \ - --model Qwen/Qwen3-4B-Thinking-2507 \ - --port 9001 \ - --gpu-memory-utilization 0.8 \ - --max-model-len=32000 - -Tests are automatically skipped if the server is not reachable. - -Run: - pytest tests/test_agent_loop_vllm.py -v - pytest tests/test_agent_loop_vllm.py -v -k "single" -""" - -import asyncio -import json -import os -import sys -from pathlib import Path -from typing import Any, Dict -from unittest.mock import patch - -import pytest -import requests - -# Ensure repo root is importable -_repo_root = Path(__file__).resolve().parent.parent.parent -if str(_repo_root) not in sys.path: - sys.path.insert(0, str(_repo_root)) - -try: - from environments.agent_loop import AgentResult, HermesAgentLoop -except ImportError: - pytest.skip("atroposlib not installed", allow_module_level=True) - - -# ========================================================================= -# Configuration -# ========================================================================= - -VLLM_HOST = "localhost" -VLLM_PORT = 9001 -VLLM_BASE_URL = f"http://{VLLM_HOST}:{VLLM_PORT}" -VLLM_MODEL = "Qwen/Qwen3-4B-Thinking-2507" - - -def _vllm_is_running() -> bool: - """Check if the vLLM server is reachable.""" - try: - r = requests.get(f"{VLLM_BASE_URL}/health", timeout=3) - return r.status_code == 200 - except Exception: - return False - - -# Skip all tests in this module if vLLM is not running -pytestmark = pytest.mark.skipif( - not _vllm_is_running(), - reason=( - f"vLLM server not reachable at {VLLM_BASE_URL}. " - "Start it with: python -m example_trainer.vllm_api_server " - f"--model {VLLM_MODEL} --port {VLLM_PORT} " - "--gpu-memory-utilization 0.8 --max-model-len=32000" - ), -) - - -# ========================================================================= -# Server setup -# ========================================================================= - -def _make_server_manager(): - """Create a ServerManager pointing to the local vLLM server.""" - from atroposlib.envs.server_handling.server_manager import ( - ServerManager, - APIServerConfig, - ) - - config = APIServerConfig( - base_url=VLLM_BASE_URL, - model_name=VLLM_MODEL, - server_type="vllm", - health_check=False, - ) - sm = ServerManager([config], tool_parser="hermes") - sm.servers[0].server_healthy = True - return sm - - -def _get_tokenizer(): - """Load the tokenizer for the model.""" - from transformers import AutoTokenizer - return AutoTokenizer.from_pretrained(VLLM_MODEL) - - -# ========================================================================= -# Fake tools -# ========================================================================= - -WEATHER_TOOL = { - "type": "function", - "function": { - "name": "get_weather", - "description": "Get the current weather for a city. Returns temperature and conditions.", - "parameters": { - "type": "object", - "properties": { - "city": { - "type": "string", - "description": "City name, e.g. 'Tokyo'", - } - }, - "required": ["city"], - }, - }, -} - -CALC_TOOL = { - "type": "function", - "function": { - "name": "calculate", - "description": "Calculate a math expression. Returns the numeric result.", - "parameters": { - "type": "object", - "properties": { - "expression": { - "type": "string", - "description": "Math expression, e.g. '2 + 3'", - } - }, - "required": ["expression"], - }, - }, -} - - -def _fake_tool_handler(tool_name: str, args: Dict[str, Any], **kwargs) -> str: - """Handle fake tool calls for testing.""" - if tool_name == "get_weather": - city = args.get("city", "Unknown") - return json.dumps({ - "city": city, - "temperature": 22, - "conditions": "sunny", - "humidity": 45, - }) - elif tool_name == "calculate": - expr = args.get("expression", "0") - try: - result = eval(expr, {"__builtins__": {}}, {}) - return json.dumps({"result": result}) - except Exception as e: - return json.dumps({"error": str(e)}) - return json.dumps({"error": f"Unknown tool: {tool_name}"}) - - -# ========================================================================= -# Tests -# ========================================================================= - -@pytest.mark.asyncio -async def test_vllm_single_tool_call(): - """vLLM model calls a tool, gets result, responds โ€” full Phase 2 flow.""" - sm = _make_server_manager() - tokenizer = _get_tokenizer() - - async with sm.managed_server(tokenizer=tokenizer) as managed: - agent = HermesAgentLoop( - server=managed, - tool_schemas=[WEATHER_TOOL], - valid_tool_names={"get_weather"}, - max_turns=5, - temperature=0.6, - max_tokens=1000, - ) - - messages = [ - {"role": "user", "content": "What's the weather in Tokyo? Use the get_weather tool."}, - ] - - with patch("environments.agent_loop.handle_function_call", side_effect=_fake_tool_handler): - result = await agent.run(messages) - - assert isinstance(result, AgentResult) - assert result.turns_used >= 2, f"Expected at least 2 turns, got {result.turns_used}" - - # Verify tool call happened - tool_calls_found = False - for msg in result.messages: - if msg.get("role") == "assistant" and msg.get("tool_calls"): - for tc in msg["tool_calls"]: - if tc["function"]["name"] == "get_weather": - tool_calls_found = True - args = json.loads(tc["function"]["arguments"]) - assert "city" in args - assert tool_calls_found, "Model should have called get_weather" - - # Verify tool results in conversation - tool_results = [m for m in result.messages if m.get("role") == "tool"] - assert len(tool_results) >= 1 - - -@pytest.mark.asyncio -async def test_vllm_multi_tool_calls(): - """vLLM model calls multiple tools across turns.""" - sm = _make_server_manager() - tokenizer = _get_tokenizer() - - async with sm.managed_server(tokenizer=tokenizer) as managed: - agent = HermesAgentLoop( - server=managed, - tool_schemas=[WEATHER_TOOL, CALC_TOOL], - valid_tool_names={"get_weather", "calculate"}, - max_turns=10, - temperature=0.6, - max_tokens=1000, - ) - - messages = [ - {"role": "user", "content": ( - "I need two things: " - "1) What's the weather in Paris? Use get_weather. " - "2) What is 15 * 7? Use calculate." - )}, - ] - - with patch("environments.agent_loop.handle_function_call", side_effect=_fake_tool_handler): - result = await agent.run(messages) - - # Both tools should be called - tools_called = set() - for msg in result.messages: - if msg.get("role") == "assistant" and msg.get("tool_calls"): - for tc in msg["tool_calls"]: - tools_called.add(tc["function"]["name"]) - - assert "get_weather" in tools_called, f"get_weather not called. Called: {tools_called}" - assert "calculate" in tools_called, f"calculate not called. Called: {tools_called}" - - -@pytest.mark.asyncio -async def test_vllm_managed_server_produces_nodes(): - """ManagedServer should produce SequenceNodes with tokens and logprobs.""" - sm = _make_server_manager() - tokenizer = _get_tokenizer() - - async with sm.managed_server(tokenizer=tokenizer) as managed: - agent = HermesAgentLoop( - server=managed, - tool_schemas=[WEATHER_TOOL], - valid_tool_names={"get_weather"}, - max_turns=5, - temperature=0.6, - max_tokens=1000, - ) - - messages = [ - {"role": "user", "content": "What's the weather in Berlin? Use get_weather."}, - ] - - with patch("environments.agent_loop.handle_function_call", side_effect=_fake_tool_handler): - result = await agent.run(messages) - - # Get the managed state โ€” should have SequenceNodes - state = managed.get_state() - - assert state is not None, "ManagedServer should return state" - nodes = state.get("nodes", []) - assert len(nodes) >= 1, f"Should have at least 1 node, got {len(nodes)}" - - node = nodes[0] - assert hasattr(node, "tokens"), "Node should have tokens" - assert hasattr(node, "logprobs"), "Node should have logprobs" - assert len(node.tokens) > 0, "Tokens should not be empty" - assert len(node.logprobs) > 0, "Logprobs should not be empty" - assert len(node.tokens) == len(node.logprobs), ( - f"Tokens ({len(node.tokens)}) and logprobs ({len(node.logprobs)}) should have same length" - ) - - -@pytest.mark.asyncio -async def test_vllm_no_tools_direct_response(): - """vLLM model should respond directly when no tools are needed.""" - sm = _make_server_manager() - tokenizer = _get_tokenizer() - - async with sm.managed_server(tokenizer=tokenizer) as managed: - agent = HermesAgentLoop( - server=managed, - tool_schemas=[WEATHER_TOOL], - valid_tool_names={"get_weather"}, - max_turns=5, - temperature=0.6, - max_tokens=500, - ) - - messages = [ - {"role": "user", "content": "What is 2 + 2? Answer directly, no tools."}, - ] - - with patch("environments.agent_loop.handle_function_call", side_effect=_fake_tool_handler): - result = await agent.run(messages) - - assert result.finished_naturally, "Should finish naturally" - assert result.turns_used == 1, f"Should take 1 turn, took {result.turns_used}" - - final = result.messages[-1] - assert final["role"] == "assistant" - assert final["content"], "Should have content" - - -@pytest.mark.asyncio -async def test_vllm_thinking_content_extracted(): - """Qwen3-Thinking model should produce reasoning content.""" - sm = _make_server_manager() - tokenizer = _get_tokenizer() - - async with sm.managed_server( - tokenizer=tokenizer, - preserve_think_blocks=True, - ) as managed: - agent = HermesAgentLoop( - server=managed, - tool_schemas=[CALC_TOOL], - valid_tool_names={"calculate"}, - max_turns=5, - temperature=0.6, - max_tokens=1000, - ) - - messages = [ - {"role": "user", "content": "What is 123 * 456? Use the calculate tool."}, - ] - - with patch("environments.agent_loop.handle_function_call", side_effect=_fake_tool_handler): - result = await agent.run(messages) - - # Qwen3-Thinking should generate blocks - # Check if any content contains thinking markers - has_thinking = False - for msg in result.messages: - content = msg.get("content", "") or "" - if "" in content or "" in content: - has_thinking = True - break - - # Also check reasoning_per_turn - has_reasoning = any(r for r in result.reasoning_per_turn if r) - - # At least one of these should be true for a thinking model - assert has_thinking or has_reasoning, ( - "Qwen3-Thinking should produce blocks or reasoning content" - ) diff --git a/tests/run_agent/test_anthropic_truncation_continuation.py b/tests/run_agent/test_anthropic_truncation_continuation.py index b7a263f16495..872015bc0bc8 100644 --- a/tests/run_agent/test_anthropic_truncation_continuation.py +++ b/tests/run_agent/test_anthropic_truncation_continuation.py @@ -59,7 +59,7 @@ def test_text_only_truncation_produces_text_content_no_tool_calls(self): nr = get_transport("anthropic_messages").normalize_response(response) # The continuation block checks these two attributes: - # assistant_message.content โ†’ appended to truncated_response_prefix + # assistant_message.content โ†’ appended to truncated_response_parts # assistant_message.tool_calls โ†’ guards the text-retry branch assert nr.content is not None assert "partial response" in nr.content diff --git a/tests/run_agent/test_compression_feasibility.py b/tests/run_agent/test_compression_feasibility.py index f935821ada94..3e23f3eb5d3f 100644 --- a/tests/run_agent/test_compression_feasibility.py +++ b/tests/run_agent/test_compression_feasibility.py @@ -16,6 +16,16 @@ from agent.context_compressor import ContextCompressor +@pytest.fixture(autouse=True) +def _stable_aux_provider_config(): + """Keep feasibility tests independent from the developer's config.yaml.""" + with patch( + "agent.auxiliary_client._resolve_task_provider_model", + return_value=("auto", None, None, None, None), + ): + yield + + def _make_agent( *, compression_enabled: bool = True, @@ -41,6 +51,7 @@ def _make_agent( agent.tool_progress_callback = None agent._compression_warning = None agent._aux_compression_context_length_config = None + agent._custom_providers = [] agent.tools = [] compressor = MagicMock(spec=ContextCompressor) @@ -182,6 +193,7 @@ def test_feasibility_check_passes_config_context_length(mock_get_client, mock_ct api_key="sk-custom", config_context_length=1_000_000, provider="openrouter", + custom_providers=[], ) @@ -205,6 +217,7 @@ def test_feasibility_check_ignores_invalid_context_length(mock_get_client, mock_ api_key="sk-test", config_context_length=None, provider="openrouter", + custom_providers=[], ) @@ -258,6 +271,7 @@ def on_session_start(self, *args, **kwargs): api_key="sk-custom", config_context_length=1_000_000, provider="", + custom_providers=[], ) diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index dadb7b31ccee..c493f91509a7 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -2524,8 +2524,9 @@ def _record_hook(name, **kwargs): assert [call["api_call_count"] for call in pre_request_calls] == [1, 2] assert [call["api_call_count"] for call in post_request_calls] == [1, 2] assert all(call["session_id"] == agent.session_id for call in pre_request_calls) - assert all("message_count" in c and "messages" not in c for c in pre_request_calls) - assert all("usage" in c and "response" not in c for c in post_request_calls) + assert all("message_count" in c and isinstance(c.get("request_messages"), list) for c in pre_request_calls) + assert any(msg.get("role") == "user" and msg.get("content") == "search something" for msg in pre_request_calls[0]["request_messages"]) + assert all("usage" in c and "response" in c and "assistant_message" in c for c in post_request_calls) def test_content_with_tool_calls_stays_silent_for_non_cli_quiet_mode(self, agent): self._setup_agent(agent) diff --git a/tests/run_agent/test_streaming_tool_call_repair.py b/tests/run_agent/test_streaming_tool_call_repair.py index dadfaec33e5e..e85c0e22d184 100644 --- a/tests/run_agent/test_streaming_tool_call_repair.py +++ b/tests/run_agent/test_streaming_tool_call_repair.py @@ -23,7 +23,7 @@ class TestStreamingAssemblyRepair: These tests verify the REPAIR FUNCTION itself works correctly for the cases that arise during streaming assembly. Integration tests that - exercise the full streaming path are in test_agent_loop_tool_calling.py. + exercise the full streaming path are in run_agent.py's streaming tests. """ # -- Truncation cases (most common streaming failure) -- diff --git a/tests/scripts/test_release_acp_registry.py b/tests/scripts/test_release_acp_registry.py new file mode 100644 index 000000000000..4d20cda25bde --- /dev/null +++ b/tests/scripts/test_release_acp_registry.py @@ -0,0 +1,113 @@ +"""Tests for the ACP Registry version-lockstep bump in scripts/release.py. + +The official ACP Registry manifest must match ``pyproject.toml`` exactly โ€” +``tests/acp/test_registry_manifest.py`` enforces this at lint time, and the +upstream registry CI rejects ``@latest`` / floating pins. The release script +is the single place that bumps the manifest in lockstep with pyproject; if +that bump ever silently breaks, weekly releases fail the manifest test +until someone hand-edits the JSON. +""" + +from __future__ import annotations + +import importlib.util +import json +from pathlib import Path + + +def _load_release_module(monkeypatch, tmp_root: Path): + """Import scripts/release.py with REPO_ROOT pinned to a temp tree.""" + spec = importlib.util.spec_from_file_location( + "_release_under_test", + Path(__file__).resolve().parents[2] / "scripts" / "release.py", + ) + assert spec and spec.loader + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + + monkeypatch.setattr(module, "REPO_ROOT", tmp_root) + monkeypatch.setattr( + module, "ACP_REGISTRY_MANIFEST", tmp_root / "acp_registry" / "agent.json" + ) + return module + + +def _write_manifest(root: Path, version: str) -> None: + manifest_dir = root / "acp_registry" + manifest_dir.mkdir(parents=True) + (manifest_dir / "agent.json").write_text( + json.dumps( + { + "id": "hermes-agent", + "name": "Hermes Agent", + "version": version, + "description": "test", + "distribution": { + "uvx": { + "package": f"hermes-agent[acp]=={version}", + "args": ["hermes-acp"], + } + }, + }, + indent=2, + ) + + "\n", + encoding="utf-8", + ) + + +def test_update_acp_registry_versions_bumps_manifest_and_pin(monkeypatch, tmp_path): + _write_manifest(tmp_path, "0.13.0") + module = _load_release_module(monkeypatch, tmp_path) + + module._update_acp_registry_versions("0.14.0") + + manifest = json.loads( + (tmp_path / "acp_registry" / "agent.json").read_text(encoding="utf-8") + ) + assert manifest["version"] == "0.14.0" + assert manifest["distribution"]["uvx"]["package"] == "hermes-agent[acp]==0.14.0" + # args stay untouched so we don't accidentally rewrite them. + assert manifest["distribution"]["uvx"]["args"] == ["hermes-acp"] + + +def test_update_acp_registry_versions_is_silent_when_manifest_missing( + monkeypatch, tmp_path +): + """Older release branches predate the ACP Registry asset โ€” must no-op.""" + module = _load_release_module(monkeypatch, tmp_path) + + # No fixture written; function should not raise. + module._update_acp_registry_versions("0.14.0") + + +def test_update_version_files_bumps_manifest_alongside_pyproject( + monkeypatch, tmp_path +): + """End-to-end: update_version_files() is the function release.py actually + calls, so it must drive the manifest bump too.""" + _write_manifest(tmp_path, "0.13.0") + (tmp_path / "pyproject.toml").write_text( + '[project]\nname = "hermes-agent"\nversion = "0.13.0"\n', encoding="utf-8" + ) + version_dir = tmp_path / "hermes_cli" + version_dir.mkdir() + (version_dir / "__init__.py").write_text( + '__version__ = "0.13.0"\n__release_date__ = "2026-05-14"\n', + encoding="utf-8", + ) + + module = _load_release_module(monkeypatch, tmp_path) + monkeypatch.setattr(module, "VERSION_FILE", version_dir / "__init__.py") + monkeypatch.setattr(module, "PYPROJECT_FILE", tmp_path / "pyproject.toml") + + module.update_version_files("0.14.0", "2026-05-21") + + pyproject_text = (tmp_path / "pyproject.toml").read_text(encoding="utf-8") + assert 'version = "0.14.0"' in pyproject_text + + manifest = json.loads( + (tmp_path / "acp_registry" / "agent.json").read_text(encoding="utf-8") + ) + assert manifest["version"] == "0.14.0" + assert manifest["distribution"]["uvx"]["package"] == "hermes-agent[acp]==0.14.0" diff --git a/tests/test_model_tools.py b/tests/test_model_tools.py index 379aac2bbcfb..beae3daa65e1 100644 --- a/tests/test_model_tools.py +++ b/tests/test_model_tools.py @@ -278,7 +278,7 @@ def test_expected_legacy_names(self): expected = [ "web_tools", "terminal_tools", "vision_tools", "moa_tools", "image_tools", "skills_tools", "browser_tools", "cronjob_tools", - "rl_tools", "file_tools", "tts_tools", + "file_tools", "tts_tools", ] for name in expected: assert name in _LEGACY_TOOLSET_MAP, f"Missing legacy toolset: {name}" diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py index 64a154bb9a78..0d5bad8e8754 100644 --- a/tests/test_tui_gateway_server.py +++ b/tests/test_tui_gateway_server.py @@ -4649,3 +4649,158 @@ def test_config_show_displays_nested_max_turns(monkeypatch): ) assert ["Max Turns", "120"] in agent_rows + + +def test_notification_poller_delivers_completion(monkeypatch): + """Poller picks up completion events and triggers agent turns.""" + from tools.process_registry import process_registry + + turns = [] + emitted = [] + + class _Agent: + def run_conversation(self, prompt, conversation_history=None, stream_callback=None): + turns.append(prompt) + return { + "final_response": "ok", + "messages": [{"role": "assistant", "content": "ok"}], + } + + class _ImmediateThread: + def __init__(self, target=None, daemon=None): + self._target = target + def start(self): + self._target() + + sess = _session(agent=_Agent()) + server._sessions["sid_poll"] = sess + monkeypatch.setattr(server.threading, "Thread", _ImmediateThread) + monkeypatch.setattr(server, "_emit", lambda *a, **kw: emitted.append(a)) + monkeypatch.setattr(server, "make_stream_renderer", lambda cols: None) + monkeypatch.setattr(server, "render_message", lambda raw, cols: None) + + # Clear queue + while not process_registry.completion_queue.empty(): + process_registry.completion_queue.get_nowait() + process_registry._completion_consumed.discard("proc_poller_test") + + stop = threading.Event() + + # Put event on queue, then immediately signal stop so the poller + # runs exactly one iteration. + process_registry.completion_queue.put({ + "type": "completion", + "session_id": "proc_poller_test", + "command": "echo hello", + "exit_code": 0, + "output": "hello", + }) + stop.set() + + try: + server._notification_poller_loop(stop, "sid_poll", sess) + + # Should have emitted a status.update with kind=process + status_calls = [a for a in emitted if a[0] == "status.update"] + assert len(status_calls) >= 1 + assert status_calls[0][2]["kind"] == "process" + + # Should have triggered an agent turn + assert len(turns) == 1 + assert "[IMPORTANT: Background process proc_poller_test completed" in turns[0] + finally: + server._sessions.pop("sid_poll", None) + while not process_registry.completion_queue.empty(): + process_registry.completion_queue.get_nowait() + + +def test_notification_poller_skips_consumed(monkeypatch): + """Already-consumed completions are not dispatched by the poller.""" + from tools.process_registry import process_registry + + turns = [] + + class _Agent: + def run_conversation(self, prompt, conversation_history=None, stream_callback=None): + turns.append(prompt) + return {"final_response": "ok", "messages": []} + + class _ImmediateThread: + def __init__(self, target=None, daemon=None): + self._target = target + def start(self): + self._target() + + sess = _session(agent=_Agent()) + server._sessions["sid_skip"] = sess + monkeypatch.setattr(server.threading, "Thread", _ImmediateThread) + monkeypatch.setattr(server, "_emit", lambda *a, **kw: None) + monkeypatch.setattr(server, "make_stream_renderer", lambda cols: None) + monkeypatch.setattr(server, "render_message", lambda raw, cols: None) + + while not process_registry.completion_queue.empty(): + process_registry.completion_queue.get_nowait() + + process_registry._completion_consumed.add("proc_already_done") + process_registry.completion_queue.put({ + "type": "completion", + "session_id": "proc_already_done", + "command": "echo x", + "exit_code": 0, + "output": "x", + }) + + stop = threading.Event() + stop.set() + + try: + server._notification_poller_loop(stop, "sid_skip", sess) + assert len(turns) == 0 + finally: + server._sessions.pop("sid_skip", None) + process_registry._completion_consumed.discard("proc_already_done") + while not process_registry.completion_queue.empty(): + process_registry.completion_queue.get_nowait() + + +def test_notification_poller_requeues_when_busy(monkeypatch): + """When the agent is busy, the poller requeues the event.""" + from tools.process_registry import process_registry + + emitted = [] + + sess = _session(running=True) # agent is busy + server._sessions["sid_busy"] = sess + monkeypatch.setattr(server, "_emit", lambda *a, **kw: emitted.append(a)) + + while not process_registry.completion_queue.empty(): + process_registry.completion_queue.get_nowait() + process_registry._completion_consumed.discard("proc_busy_test") + + evt = { + "type": "completion", + "session_id": "proc_busy_test", + "command": "make build", + "exit_code": 0, + "output": "ok", + } + process_registry.completion_queue.put(evt) + + stop = threading.Event() + stop.set() + + try: + server._notification_poller_loop(stop, "sid_busy", sess) + + # Status update was emitted (user sees it) + status_calls = [a for a in emitted if a[0] == "status.update"] + assert len(status_calls) == 1 + + # Event was requeued (agent was busy, no turn triggered) + assert not process_registry.completion_queue.empty() + requeued = process_registry.completion_queue.get_nowait() + assert requeued["session_id"] == "proc_busy_test" + finally: + server._sessions.pop("sid_busy", None) + while not process_registry.completion_queue.empty(): + process_registry.completion_queue.get_nowait() diff --git a/tests/tools/test_image_generation_env.py b/tests/tools/test_image_generation_env.py index fc4e65533465..56c9741617ff 100644 --- a/tests/tools/test_image_generation_env.py +++ b/tests/tools/test_image_generation_env.py @@ -37,3 +37,62 @@ def test_fal_key_empty_is_unset(monkeypatch): ) assert image_generation_tool.check_fal_api_key() is False + + +# --------------------------------------------------------------------------- +# Actionable setup message when no FAL backend is reachable. +# Regression for the silent-drop UX gap described in issue #2543. +# --------------------------------------------------------------------------- + + +def test_no_backend_message_mentions_fal_signup_and_plugins(monkeypatch): + from tools import image_generation_tool + + monkeypatch.setattr( + image_generation_tool, "managed_nous_tools_enabled", lambda: False + ) + + msg = image_generation_tool._build_no_backend_setup_message() + + assert "FAL_KEY" in msg + assert "https://fal.ai" in msg + # Plugin pointer so users on a stale image_gen.provider know where to look. + assert "hermes tools" in msg or "hermes plugins" in msg + + +def test_no_backend_message_mentions_managed_gateway_when_enabled(monkeypatch): + from tools import image_generation_tool + + monkeypatch.setattr( + image_generation_tool, "managed_nous_tools_enabled", lambda: True + ) + + msg = image_generation_tool._build_no_backend_setup_message() + + assert "managed FAL gateway" in msg + assert "Nous account" in msg or "hermes setup" in msg + + +def test_image_generate_tool_returns_actionable_error_when_no_backend(monkeypatch): + """End-to-end: handler must surface the actionable message, not a bare string.""" + import json + + from tools import image_generation_tool + + monkeypatch.setattr( + image_generation_tool, "fal_key_is_configured", lambda: False + ) + monkeypatch.setattr( + image_generation_tool, "_resolve_managed_fal_gateway", lambda: None + ) + monkeypatch.setattr( + image_generation_tool, "managed_nous_tools_enabled", lambda: False + ) + + result = json.loads( + image_generation_tool.image_generate_tool(prompt="a cat") + ) + + assert result["success"] is False + assert "https://fal.ai" in result["error"] + assert "FAL_KEY" in result["error"] diff --git a/tests/tools/test_managed_server_tool_support.py b/tests/tools/test_managed_server_tool_support.py deleted file mode 100644 index 5b917f3da89a..000000000000 --- a/tests/tools/test_managed_server_tool_support.py +++ /dev/null @@ -1,178 +0,0 @@ -""" -Tests for ManagedServer / tool-parser integration. - -Validates that: -1. The installed atroposlib API still matches Hermes's expectations -2. Hermes's parser registry remains compatible with ManagedServer parsing -3. HermesAgentBaseEnv wires the selected parser into ServerManager correctly - -These tests verify the contract between hermes-agent's environments/ code -and atroposlib's ManagedServer. They detect API incompatibilities early. -""" - -import inspect -import sys -from pathlib import Path - -import pytest - -sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) - -try: - import atroposlib # noqa: F401 -except ImportError: - pytest.skip("atroposlib not installed", allow_module_level=True) - - -class TestManagedServerAPI: - """Test that ManagedServer's API matches what hermes-agent expects.""" - - def test_managed_server_init_signature(self): - """ManagedServer should accept tool_call_parser parameter.""" - from atroposlib.envs.server_handling.managed_server import ManagedServer - - sig = inspect.signature(ManagedServer.__init__) - params = list(sig.parameters.keys()) - - # Core params that must exist - assert "self" in params - assert "server" in params - assert "tokenizer" in params - assert "track_tree" in params - - # tool_call_parser โ€” required for tool_call_support branch - # If this fails, atroposlib hasn't been updated to tool_call_support - has_tool_parser = "tool_call_parser" in params - if not has_tool_parser: - pytest.skip( - "ManagedServer does not have tool_call_parser param โ€” " - "baseline atroposlib (pre tool_call_support branch)" - ) - - def test_server_manager_managed_server_signature(self): - """ServerManager.managed_server() should accept tool_call_parser.""" - from atroposlib.envs.server_handling.server_manager import ServerManager - - sig = inspect.signature(ServerManager.managed_server) - params = list(sig.parameters.keys()) - - assert "self" in params - assert "tokenizer" in params - - has_tool_parser = "tool_call_parser" in params - if not has_tool_parser: - pytest.skip( - "ServerManager.managed_server() does not have tool_call_parser param โ€” " - "baseline atroposlib (pre tool_call_support branch)" - ) - - def test_managed_server_chat_template_kwargs(self): - """ManagedServer should have CHAT_TEMPLATE_KWARGS for forwarding tools/thinking.""" - from atroposlib.envs.server_handling.managed_server import ManagedServer - - if not hasattr(ManagedServer, "CHAT_TEMPLATE_KWARGS"): - pytest.skip( - "ManagedServer does not have CHAT_TEMPLATE_KWARGS โ€” " - "baseline atroposlib (pre tool_call_support branch)" - ) - - kwargs = ManagedServer.CHAT_TEMPLATE_KWARGS - assert "tools" in kwargs, "tools must be in CHAT_TEMPLATE_KWARGS" - - def test_no_get_logprobs_method(self): - """get_logprobs should be removed in tool_call_support branch.""" - from atroposlib.envs.server_handling.managed_server import ManagedServer - - # In baseline, get_logprobs exists. In tool_call_support, it's removed. - # We just note the state โ€” not a hard fail either way. - has_get_logprobs = hasattr(ManagedServer, "get_logprobs") - if has_get_logprobs: - pytest.skip( - "ManagedServer still has get_logprobs โ€” baseline atroposlib" - ) - - -class TestParserCompatibility: - """Test that hermes-agent's parsers match ManagedServer's expectations.""" - - def test_parser_parse_returns_correct_format(self): - """ - ManagedServer expects parser.parse(text) -> (content, tool_calls) - where tool_calls is a list of objects with .id, .function.name, .function.arguments - """ - from environments.tool_call_parsers import get_parser - - parser = get_parser("hermes") - text = '{"name": "terminal", "arguments": {"command": "ls"}}' - content, tool_calls = parser.parse(text) - - assert tool_calls is not None - assert len(tool_calls) == 1 - - tc = tool_calls[0] - # ManagedServer accesses these attrs directly - assert hasattr(tc, "id") - assert hasattr(tc, "function") - assert hasattr(tc.function, "name") - assert hasattr(tc.function, "arguments") - - def test_parser_no_tools_returns_none(self): - """ManagedServer checks `if parsed_tool_calls:` โ€” None should be falsy.""" - from environments.tool_call_parsers import get_parser - - parser = get_parser("hermes") - content, tool_calls = parser.parse("Just text, no tools") - assert tool_calls is None - - def test_parser_content_is_string_or_none(self): - """ManagedServer uses `parsed_content or ""` โ€” must be str or None.""" - from environments.tool_call_parsers import get_parser - - parser = get_parser("hermes") - - # With tool calls - text = '{"name": "terminal", "arguments": {"command": "ls"}}' - content, _ = parser.parse(text) - assert content is None or isinstance(content, str) - - # Without tool calls - content2, _ = parser.parse("Just text") - assert isinstance(content2, str) - - -class TestBaseEnvCompatibility: - """Test that hermes_base_env.py's tool-parser wiring matches the current API.""" - - def test_hermes_base_env_sets_server_manager_tool_parser(self): - """Hermes wires parser selection through ServerManager.tool_parser.""" - import ast - - base_env_path = Path(__file__).parent.parent.parent / "environments" / "hermes_base_env.py" - source = base_env_path.read_text() - tree = ast.parse(source) - - found_assignment = False - for node in ast.walk(tree): - if isinstance(node, ast.Assign): - for target in node.targets: - if isinstance(target, ast.Attribute) and target.attr == "tool_parser": - parent = target.value - if ( - isinstance(parent, ast.Attribute) - and parent.attr == "server" - and isinstance(parent.value, ast.Name) - and parent.value.id == "self" - ): - found_assignment = True - - assert found_assignment, ( - "hermes_base_env.py should set self.server.tool_parser from config.tool_call_parser" - ) - - def test_hermes_base_env_uses_config_tool_call_parser(self): - """Verify hermes_base_env uses the config field rather than a local parser instance.""" - base_env_path = Path(__file__).parent.parent.parent / "environments" / "hermes_base_env.py" - source = base_env_path.read_text() - - assert 'tool_call_parser: str = Field(' in source - assert 'self.server.tool_parser = config.tool_call_parser' in source diff --git a/tests/tools/test_process_registry.py b/tests/tools/test_process_registry.py index f438b637e286..46c29bb9d096 100644 --- a/tests/tools/test_process_registry.py +++ b/tests/tools/test_process_registry.py @@ -865,3 +865,138 @@ def test_unknown_action(self): from tools.process_registry import _handle_process result = json.loads(_handle_process({"action": "unknown_action"})) assert "error" in result + + +# ========================================================================= +# format_process_notification + drain_notifications (shared helpers) +# ========================================================================= + +from tools.process_registry import format_process_notification + + +def test_format_completion_event(): + evt = { + "type": "completion", + "session_id": "proc_abc", + "command": "sleep 5", + "exit_code": 0, + "output": "done", + } + result = format_process_notification(evt) + assert "[IMPORTANT: Background process proc_abc completed" in result + assert "exit code 0" in result + assert "Command: sleep 5" in result + assert "Output:\ndone]" in result + + +def test_format_watch_match_event(): + evt = { + "type": "watch_match", + "session_id": "proc_xyz", + "command": "tail -f log", + "pattern": "ERROR", + "output": "ERROR: disk full", + "suppressed": 0, + } + result = format_process_notification(evt) + assert 'watch pattern "ERROR"' in result + assert "Matched output:\nERROR: disk full" in result + + +def test_format_watch_match_with_suppressed(): + evt = { + "type": "watch_match", + "session_id": "proc_xyz", + "command": "tail -f log", + "pattern": "WARN", + "output": "WARN: low mem", + "suppressed": 3, + } + result = format_process_notification(evt) + assert "3 earlier matches were suppressed" in result + + +def test_format_watch_disabled_event(): + evt = { + "type": "watch_disabled", + "message": "Watch disabled for proc_xyz: too many matches", + } + result = format_process_notification(evt) + assert "[IMPORTANT: Watch disabled for proc_xyz" in result + + +def test_format_returns_none_for_empty_event(): + evt = {} + result = format_process_notification(evt) + assert result is not None + assert "unknown" in result + + +def test_drain_notifications_returns_pending_events(): + from tools.process_registry import process_registry + + while not process_registry.completion_queue.empty(): + process_registry.completion_queue.get_nowait() + + process_registry.completion_queue.put({ + "type": "completion", + "session_id": "proc_drain1", + "command": "echo hi", + "exit_code": 0, + "output": "hi", + }) + process_registry.completion_queue.put({ + "type": "watch_match", + "session_id": "proc_drain2", + "command": "tail -f x", + "pattern": "ERR", + "output": "ERR found", + "suppressed": 0, + }) + + try: + results = process_registry.drain_notifications() + assert len(results) == 2 + assert results[0][0]["session_id"] == "proc_drain1" + assert "proc_drain1 completed" in results[0][1] + assert results[1][0]["session_id"] == "proc_drain2" + assert "watch pattern" in results[1][1] + finally: + while not process_registry.completion_queue.empty(): + process_registry.completion_queue.get_nowait() + process_registry._completion_consumed.discard("proc_drain1") + process_registry._completion_consumed.discard("proc_drain2") + + +def test_drain_notifications_skips_consumed(): + from tools.process_registry import process_registry + + while not process_registry.completion_queue.empty(): + process_registry.completion_queue.get_nowait() + + process_registry._completion_consumed.add("proc_consumed") + process_registry.completion_queue.put({ + "type": "completion", + "session_id": "proc_consumed", + "command": "echo done", + "exit_code": 0, + "output": "done", + }) + + try: + results = process_registry.drain_notifications() + assert len(results) == 0 + finally: + process_registry._completion_consumed.discard("proc_consumed") + while not process_registry.completion_queue.empty(): + process_registry.completion_queue.get_nowait() + + +def test_drain_notifications_empty_queue(): + from tools.process_registry import process_registry + + while not process_registry.completion_queue.empty(): + process_registry.completion_queue.get_nowait() + + results = process_registry.drain_notifications() + assert results == [] diff --git a/tests/tools/test_rl_training_tool.py b/tests/tools/test_rl_training_tool.py deleted file mode 100644 index 8b68ea8d9464..000000000000 --- a/tests/tools/test_rl_training_tool.py +++ /dev/null @@ -1,142 +0,0 @@ -"""Tests for rl_training_tool.py โ€” file handle lifecycle and cleanup. - -Verifies that _stop_training_run properly closes log file handles, -terminates processes, and handles edge cases on failure paths. -Inspired by PR #715 (0xbyt4). -""" - -from unittest.mock import MagicMock - -import pytest - -from tools.rl_training_tool import RunState, _stop_training_run - - -def _make_run_state(**overrides) -> RunState: - """Create a minimal RunState for testing.""" - defaults = { - "run_id": "test-run-001", - "environment": "test_env", - "config": {}, - } - defaults.update(overrides) - return RunState(**defaults) - - -class TestStopTrainingRunFileHandles: - """Verify that _stop_training_run closes log file handles stored as attributes.""" - - def test_closes_all_log_file_handles(self): - state = _make_run_state() - files = {} - for attr in ("api_log_file", "trainer_log_file", "env_log_file"): - fh = MagicMock() - setattr(state, attr, fh) - files[attr] = fh - - _stop_training_run(state) - - for attr, fh in files.items(): - fh.close.assert_called_once() - assert getattr(state, attr) is None - - def test_clears_file_attrs_to_none(self): - state = _make_run_state() - state.api_log_file = MagicMock() - - _stop_training_run(state) - - assert state.api_log_file is None - - def test_close_exception_does_not_propagate(self): - """If a file handle .close() raises, it must not crash.""" - state = _make_run_state() - bad_fh = MagicMock() - bad_fh.close.side_effect = OSError("already closed") - good_fh = MagicMock() - state.api_log_file = bad_fh - state.trainer_log_file = good_fh - - _stop_training_run(state) # should not raise - - bad_fh.close.assert_called_once() - good_fh.close.assert_called_once() - - def test_handles_missing_file_attrs(self): - """RunState without log file attrs should not crash.""" - state = _make_run_state() - # No log file attrs set at all โ€” getattr(..., None) should handle it - _stop_training_run(state) # should not raise - - -class TestStopTrainingRunProcesses: - """Verify that _stop_training_run terminates processes correctly.""" - - def test_terminates_running_processes(self): - state = _make_run_state() - for attr in ("api_process", "trainer_process", "env_process"): - proc = MagicMock() - proc.poll.return_value = None # still running - setattr(state, attr, proc) - - _stop_training_run(state) - - for attr in ("api_process", "trainer_process", "env_process"): - getattr(state, attr).terminate.assert_called_once() - - def test_does_not_terminate_exited_processes(self): - state = _make_run_state() - proc = MagicMock() - proc.poll.return_value = 0 # already exited - state.api_process = proc - - _stop_training_run(state) - - proc.terminate.assert_not_called() - - def test_handles_none_processes(self): - state = _make_run_state() - # All process attrs are None by default - _stop_training_run(state) # should not raise - - def test_handles_mixed_running_and_exited_processes(self): - state = _make_run_state() - # api still running - api = MagicMock() - api.poll.return_value = None - state.api_process = api - # trainer already exited - trainer = MagicMock() - trainer.poll.return_value = 0 - state.trainer_process = trainer - # env is None - state.env_process = None - - _stop_training_run(state) - - api.terminate.assert_called_once() - trainer.terminate.assert_not_called() - - -class TestStopTrainingRunStatus: - """Verify status transitions in _stop_training_run.""" - - def test_sets_status_to_stopped_when_running(self): - state = _make_run_state(status="running") - _stop_training_run(state) - assert state.status == "stopped" - - def test_does_not_change_status_when_failed(self): - state = _make_run_state(status="failed") - _stop_training_run(state) - assert state.status == "failed" - - def test_does_not_change_status_when_pending(self): - state = _make_run_state(status="pending") - _stop_training_run(state) - assert state.status == "pending" - - def test_no_crash_with_no_processes_and_no_files(self): - state = _make_run_state() - _stop_training_run(state) # should not raise - assert state.status == "pending" diff --git a/tests/tools/test_send_message_tool.py b/tests/tools/test_send_message_tool.py index fa810eb5c54d..3d1313b0ec0b 100644 --- a/tests/tools/test_send_message_tool.py +++ b/tests/tools/test_send_message_tool.py @@ -844,6 +844,34 @@ def test_discord_whitespace_is_stripped(self): assert is_explicit is True +class TestParseTargetRefTlon: + """_parse_target_ref handles Tlon ships, channels, and thread replies.""" + + def test_tlon_ship_dm_is_explicit(self): + chat_id, thread_id, is_explicit = _parse_target_ref("tlon", "~zod") + assert chat_id == "~zod" + assert thread_id is None + assert is_explicit is True + + def test_tlon_channel_is_explicit(self): + chat_id, thread_id, is_explicit = _parse_target_ref("tlon", "chat/~zod/general") + assert chat_id == "chat/~zod/general" + assert thread_id is None + assert is_explicit is True + + def test_tlon_channel_thread_is_explicit(self): + chat_id, thread_id, is_explicit = _parse_target_ref( + "tlon", + "chat/~zod/general:170.141.184", + ) + assert chat_id == "chat/~zod/general" + assert thread_id == "170.141.184" + assert is_explicit is True + + def test_tlon_invalid_target_requires_directory_resolution(self): + assert _parse_target_ref("tlon", "general")[2] is False + + class TestParseTargetRefMatrix: """_parse_target_ref correctly handles Matrix room IDs and user MXIDs.""" diff --git a/tests/tools/test_tool_call_parsers.py b/tests/tools/test_tool_call_parsers.py deleted file mode 100644 index bdea75698a89..000000000000 --- a/tests/tools/test_tool_call_parsers.py +++ /dev/null @@ -1,274 +0,0 @@ -""" -Tests for environments/tool_call_parsers/ โ€” client-side tool call parsers. - -These parsers extract structured tool_calls from raw model output text. -Used in Phase 2 (VLLM/generate) where the server returns raw tokens. -""" - -import json -import sys -from pathlib import Path - -import pytest - -# Ensure repo root is importable -sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) - -try: - from environments.tool_call_parsers import ( - ParseResult, - ToolCallParser, - get_parser, - list_parsers, - ) -except ImportError: - pytest.skip("atroposlib not installed", allow_module_level=True) - - -# โ”€โ”€โ”€ Registry tests โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ - -class TestParserRegistry: - def test_list_parsers_returns_nonempty(self): - parsers = list_parsers() - assert len(parsers) > 0 - - def test_hermes_parser_registered(self): - parsers = list_parsers() - assert "hermes" in parsers - - def test_get_parser_returns_instance(self): - parser = get_parser("hermes") - assert isinstance(parser, ToolCallParser) - - def test_get_parser_unknown_raises(self): - with pytest.raises(KeyError): - get_parser("nonexistent_parser_xyz") - - def test_all_registered_parsers_instantiate(self): - """Every registered parser should be importable and instantiable.""" - for name in list_parsers(): - parser = get_parser(name) - assert isinstance(parser, ToolCallParser) - assert hasattr(parser, "parse") - - -# โ”€โ”€โ”€ Hermes parser tests โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ - -class TestHermesParser: - @pytest.fixture - def parser(self): - return get_parser("hermes") - - def test_no_tool_call(self, parser): - text = "Hello, I can help you with that." - content, tool_calls = parser.parse(text) - assert content == text - assert tool_calls is None - - def test_single_tool_call(self, parser): - text = '{"name": "terminal", "arguments": {"command": "ls -la"}}' - content, tool_calls = parser.parse(text) - assert tool_calls is not None - assert len(tool_calls) == 1 - assert tool_calls[0].function.name == "terminal" - args = json.loads(tool_calls[0].function.arguments) - assert args["command"] == "ls -la" - - def test_tool_call_with_surrounding_text(self, parser): - text = 'Let me check that for you.\n{"name": "terminal", "arguments": {"command": "pwd"}}' - content, tool_calls = parser.parse(text) - assert tool_calls is not None - assert len(tool_calls) == 1 - assert tool_calls[0].function.name == "terminal" - # Content should have the surrounding text - if content is not None: - assert "check that" in content or content.strip() != "" - - def test_multiple_tool_calls(self, parser): - text = ( - '{"name": "terminal", "arguments": {"command": "ls"}}\n' - '{"name": "read_file", "arguments": {"path": "test.py"}}' - ) - content, tool_calls = parser.parse(text) - assert tool_calls is not None - assert len(tool_calls) == 2 - names = {tc.function.name for tc in tool_calls} - assert "terminal" in names - assert "read_file" in names - - def test_tool_call_ids_are_unique(self, parser): - text = ( - '{"name": "terminal", "arguments": {"command": "ls"}}\n' - '{"name": "terminal", "arguments": {"command": "pwd"}}' - ) - _, tool_calls = parser.parse(text) - assert tool_calls is not None - ids = [tc.id for tc in tool_calls] - assert len(ids) == len(set(ids)), "Tool call IDs must be unique" - - def test_empty_string(self, parser): - content, tool_calls = parser.parse("") - assert tool_calls is None - - def test_malformed_json_in_tool_call(self, parser): - text = 'not valid json' - content, tool_calls = parser.parse(text) - # Should either return None tool_calls or handle gracefully - # (implementation may vary โ€” some parsers return error tool calls) - - def test_truncated_tool_call(self, parser): - """Test handling of unclosed tool_call tag (model truncated mid-generation).""" - text = '{"name": "terminal", "arguments": {"command": "ls -la"}' - content, tool_calls = parser.parse(text) - # Parser should handle truncated output gracefully - # Either parse it successfully or return None - - -# โ”€โ”€โ”€ Parse result contract tests (applies to ALL parsers) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ - -class TestParseResultContract: - """Ensure all parsers conform to the ParseResult contract.""" - - @pytest.fixture(params=["hermes"]) # Add more as needed - def parser(self, request): - return get_parser(request.param) - - def test_returns_tuple_of_two(self, parser): - result = parser.parse("hello world") - assert isinstance(result, tuple) - assert len(result) == 2 - - def test_no_tools_returns_none_tool_calls(self, parser): - content, tool_calls = parser.parse("Just plain text, no tools.") - assert tool_calls is None - assert content is not None - - def test_tool_calls_are_proper_objects(self, parser): - """When tool calls are found, they should be ChatCompletionMessageToolCall objects.""" - # Use hermes format since that's universal - text = '{"name": "terminal", "arguments": {"command": "echo hi"}}' - content, tool_calls = parser.parse(text) - if tool_calls is not None: - for tc in tool_calls: - assert hasattr(tc, "id") - assert hasattr(tc, "function") - assert hasattr(tc.function, "name") - assert hasattr(tc.function, "arguments") - assert tc.id is not None - assert isinstance(tc.function.name, str) - assert isinstance(tc.function.arguments, str) - - -# โ”€โ”€โ”€ DeepSeek V3 parser tests โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ - -class TestDeepSeekV3Parser: - @pytest.fixture - def parser(self): - return get_parser("deepseek_v3") - - def test_no_tool_call(self, parser): - text = "Hello, how can I help you?" - content, tool_calls = parser.parse(text) - assert content == text - assert tool_calls is None - - def test_single_tool_call(self, parser): - text = ( - '<๏ฝœtoolโ–callsโ–begin๏ฝœ><๏ฝœtoolโ–callโ–begin๏ฝœ>function<๏ฝœtoolโ–sep๏ฝœ>get_weather\n' - '```json\n{"city": "London"}\n```<๏ฝœtoolโ–callโ–end๏ฝœ><๏ฝœtoolโ–callsโ–end๏ฝœ>' - ) - content, tool_calls = parser.parse(text) - assert tool_calls is not None - assert len(tool_calls) == 1 - assert tool_calls[0].function.name == "get_weather" - args = json.loads(tool_calls[0].function.arguments) - assert args["city"] == "London" - - def test_multiple_tool_calls(self, parser): - text = ( - '<๏ฝœtoolโ–callsโ–begin๏ฝœ>' - '<๏ฝœtoolโ–callโ–begin๏ฝœ>function<๏ฝœtoolโ–sep๏ฝœ>get_weather\n' - '```json\n{"city": "London"}\n```<๏ฝœtoolโ–callโ–end๏ฝœ>' - '<๏ฝœtoolโ–callโ–begin๏ฝœ>function<๏ฝœtoolโ–sep๏ฝœ>get_time\n' - '```json\n{"timezone": "UTC"}\n```<๏ฝœtoolโ–callโ–end๏ฝœ>' - '<๏ฝœtoolโ–callsโ–end๏ฝœ>' - ) - content, tool_calls = parser.parse(text) - assert tool_calls is not None - assert len(tool_calls) == 2, f"Expected 2 tool calls, got {len(tool_calls)}" - names = [tc.function.name for tc in tool_calls] - assert "get_weather" in names - assert "get_time" in names - - def test_tool_call_with_preceding_text(self, parser): - text = ( - 'Let me check that for you.\n' - '<๏ฝœtoolโ–callsโ–begin๏ฝœ><๏ฝœtoolโ–callโ–begin๏ฝœ>function<๏ฝœtoolโ–sep๏ฝœ>terminal\n' - '```json\n{"command": "ls"}\n```<๏ฝœtoolโ–callโ–end๏ฝœ><๏ฝœtoolโ–callsโ–end๏ฝœ>' - ) - content, tool_calls = parser.parse(text) - assert tool_calls is not None - assert len(tool_calls) == 1 - - -# โ”€โ”€โ”€ Mistral parser tests โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ - -class TestMistralParser: - @pytest.fixture - def parser(self): - return get_parser("mistral") - - def test_no_tool_call(self, parser): - text = "Hello, how can I help you?" - content, tool_calls = parser.parse(text) - assert content == text - assert tool_calls is None - - def test_pre_v11_single_tool_call(self, parser): - text = '[TOOL_CALLS] [{"name": "func", "arguments": {"key": "val"}}]' - content, tool_calls = parser.parse(text) - assert tool_calls is not None - assert len(tool_calls) == 1 - assert tool_calls[0].function.name == "func" - args = json.loads(tool_calls[0].function.arguments) - assert args["key"] == "val" - - def test_pre_v11_nested_json(self, parser): - text = '[TOOL_CALLS] [{"name": "func", "arguments": {"nested": {"deep": true}}}]' - content, tool_calls = parser.parse(text) - assert tool_calls is not None - assert len(tool_calls) == 1 - assert tool_calls[0].function.name == "func" - args = json.loads(tool_calls[0].function.arguments) - assert args["nested"]["deep"] is True - - def test_v11_single_tool_call(self, parser): - text = '[TOOL_CALLS]get_weather{"city": "London"}' - content, tool_calls = parser.parse(text) - assert tool_calls is not None - assert len(tool_calls) == 1 - assert tool_calls[0].function.name == "get_weather" - args = json.loads(tool_calls[0].function.arguments) - assert args["city"] == "London" - - def test_v11_multiple_tool_calls(self, parser): - text = '[TOOL_CALLS]func1{"a": 1}[TOOL_CALLS]func2{"b": 2}' - content, tool_calls = parser.parse(text) - assert tool_calls is not None - assert len(tool_calls) == 2 - names = [tc.function.name for tc in tool_calls] - assert "func1" in names - assert "func2" in names - - def test_preceding_text_preserved(self, parser): - text = 'Hello[TOOL_CALLS]func{"a": 1}' - content, tool_calls = parser.parse(text) - assert content == "Hello" - assert tool_calls is not None - assert len(tool_calls) == 1 - assert tool_calls[0].function.name == "func" - - def test_malformed_json_fallback(self, parser): - text = "[TOOL_CALLS] not valid json" - content, tool_calls = parser.parse(text) - assert tool_calls is None diff --git a/tests/tools/test_url_safety.py b/tests/tools/test_url_safety.py index 38d27d40af3c..5a0cceb2880e 100644 --- a/tests/tools/test_url_safety.py +++ b/tests/tools/test_url_safety.py @@ -22,6 +22,14 @@ def test_public_url_allowed(self): ]): assert is_safe_url("https://example.com/image.png") is True + def test_ftp_scheme_blocked(self): + """Only http/https should be allowed for fetch tools.""" + assert is_safe_url("ftp://example.com/file.txt") is False + + def test_missing_scheme_blocked(self): + """Bare host/path should be rejected to avoid ambiguous handling.""" + assert is_safe_url("example.com/path") is False + def test_localhost_blocked(self): with patch("socket.getaddrinfo", return_value=[ (2, 1, 6, "", ("127.0.0.1", 0)), diff --git a/tinker-atropos b/tinker-atropos deleted file mode 160000 index 65f084ee8054..000000000000 --- a/tinker-atropos +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 65f084ee8054a5d02aeac76e24ed60388511c82b diff --git a/tools/browser_providers/browser_use.py b/tools/browser_providers/browser_use.py index 260249ef0bb7..a1f4f425ba02 100644 --- a/tools/browser_providers/browser_use.py +++ b/tools/browser_providers/browser_use.py @@ -137,12 +137,22 @@ def create_session(self, task_id: str) -> Dict[str, object]: else {} ) - response = requests.post( - f"{config['base_url']}/browsers", - headers=headers, - json=payload, - timeout=30, - ) + try: + response = requests.post( + f"{config['base_url']}/browsers", + headers=headers, + json=payload, + timeout=30, + ) + except requests.RequestException as exc: + # Managed mode: propagate raw so callers can retry with the + # preserved idempotency key. Direct mode: wrap network failures + # into a clean RuntimeError for end users. + if managed_mode: + raise + raise RuntimeError( + f"Browser Use API connection failed: {exc}" + ) from exc if not response.ok: if managed_mode and not _should_preserve_pending_create_key(response): diff --git a/tools/browser_providers/browserbase.py b/tools/browser_providers/browserbase.py index 5076af4c7a6f..4807345214b0 100644 --- a/tools/browser_providers/browserbase.py +++ b/tools/browser_providers/browserbase.py @@ -92,45 +92,50 @@ def create_session(self, task_id: str) -> Dict[str, object]: "X-BB-API-Key": config["api_key"], } - response = requests.post( - f"{config['base_url']}/v1/sessions", - headers=headers, - json=session_config, - timeout=30, - ) - - proxies_fallback = False - keepalive_fallback = False - - # Handle 402 โ€” paid features unavailable - if response.status_code == 402: - if enable_keep_alive: - keepalive_fallback = True - logger.warning( - "keepAlive may require paid plan (402), retrying without it. " - "Sessions may timeout during long operations." - ) - session_config.pop("keepAlive", None) - response = requests.post( - f"{config['base_url']}/v1/sessions", - headers=headers, - json=session_config, - timeout=30, - ) + try: + response = requests.post( + f"{config['base_url']}/v1/sessions", + headers=headers, + json=session_config, + timeout=30, + ) - if response.status_code == 402 and enable_proxies: - proxies_fallback = True - logger.warning( - "Proxies unavailable (402), retrying without proxies. " - "Bot detection may be less effective." - ) - session_config.pop("proxies", None) - response = requests.post( - f"{config['base_url']}/v1/sessions", - headers=headers, - json=session_config, - timeout=30, - ) + proxies_fallback = False + keepalive_fallback = False + + # Handle 402 โ€” paid features unavailable + if response.status_code == 402: + if enable_keep_alive: + keepalive_fallback = True + logger.warning( + "keepAlive may require paid plan (402), retrying without it. " + "Sessions may timeout during long operations." + ) + session_config.pop("keepAlive", None) + response = requests.post( + f"{config['base_url']}/v1/sessions", + headers=headers, + json=session_config, + timeout=30, + ) + + if response.status_code == 402 and enable_proxies: + proxies_fallback = True + logger.warning( + "Proxies unavailable (402), retrying without proxies. " + "Bot detection may be less effective." + ) + session_config.pop("proxies", None) + response = requests.post( + f"{config['base_url']}/v1/sessions", + headers=headers, + json=session_config, + timeout=30, + ) + except requests.RequestException as exc: + raise RuntimeError( + f"Browserbase API connection failed: {exc}" + ) from exc if not response.ok: raise RuntimeError( diff --git a/tools/browser_providers/firecrawl.py b/tools/browser_providers/firecrawl.py index 17001f72f1dc..4a8ae82a2d24 100644 --- a/tools/browser_providers/firecrawl.py +++ b/tools/browser_providers/firecrawl.py @@ -47,12 +47,17 @@ def create_session(self, task_id: str) -> Dict[str, object]: body: Dict[str, object] = {"ttl": ttl} - response = requests.post( - f"{self._api_url()}/v2/browser", - headers=self._headers(), - json=body, - timeout=30, - ) + try: + response = requests.post( + f"{self._api_url()}/v2/browser", + headers=self._headers(), + json=body, + timeout=30, + ) + except requests.RequestException as exc: + raise RuntimeError( + f"Firecrawl API connection failed: {exc}" + ) from exc if not response.ok: raise RuntimeError( diff --git a/tools/browser_tool.py b/tools/browser_tool.py index 79a6c7e61721..575beba6c026 100644 --- a/tools/browser_tool.py +++ b/tools/browser_tool.py @@ -1873,7 +1873,13 @@ def _run_browser_command( # - Ubuntu 23.10+ / AppArmor systems: unprivileged user namespaces # are restricted, causing Chromium to exit with "No usable sandbox" # even for non-root users running under systemd or containers. - if "AGENT_BROWSER_CHROME_FLAGS" not in browser_env: + # Honour either the legacy AGENT_BROWSER_CHROME_FLAGS (never consumed by + # agent-browser itself, but documented in older notes) or the real + # AGENT_BROWSER_ARGS โ€” if the user pre-sets either, don't overwrite it. + if ( + "AGENT_BROWSER_ARGS" not in browser_env + and "AGENT_BROWSER_CHROME_FLAGS" not in browser_env + ): _needs_sandbox_bypass = False if hasattr(os, "geteuid") and os.geteuid() == 0: _needs_sandbox_bypass = True @@ -1892,8 +1898,8 @@ def _run_browser_command( except OSError: pass if _needs_sandbox_bypass: - browser_env["AGENT_BROWSER_CHROME_FLAGS"] = ( - "--no-sandbox --disable-dev-shm-usage" + browser_env["AGENT_BROWSER_ARGS"] = ( + "--no-sandbox,--disable-dev-shm-usage" ) # Use temp files for stdout/stderr instead of pipes. diff --git a/tools/budget_config.py b/tools/budget_config.py index 577e59442ee2..093188d5c75a 100644 --- a/tools/budget_config.py +++ b/tools/budget_config.py @@ -1,6 +1,5 @@ """Configurable budget constants for tool result persistence. -Overridable at the RL environment level via HermesAgentEnvConfig fields. Per-tool resolution: pinned > config overrides > registry > default. """ diff --git a/tools/cronjob_tools.py b/tools/cronjob_tools.py index e63b60047acf..3c29431484de 100644 --- a/tools/cronjob_tools.py +++ b/tools/cronjob_tools.py @@ -21,12 +21,14 @@ sys.path.insert(0, str(Path(__file__).parent.parent)) from cron.jobs import ( + AmbiguousJobReference, create_job, get_job, list_jobs, parse_schedule, pause_job, remove_job, + resolve_job_ref, resume_job, trigger_job, update_job, @@ -393,12 +395,32 @@ def cronjob( if not job_id: return tool_error(f"job_id is required for action '{normalized}'", success=False) - job = get_job(job_id) + try: + job = resolve_job_ref(job_id) + except AmbiguousJobReference as exc: + return json.dumps( + { + "success": False, + "error": str(exc), + "matches": [ + { + "id": m["id"], + "name": m.get("name"), + "schedule": m.get("schedule_display"), + "next_run_at": m.get("next_run_at"), + } + for m in exc.matches + ], + }, + indent=2, + ) if not job: return json.dumps( - {"success": False, "error": f"Job with ID '{job_id}' not found. Use cronjob(action='list') to inspect jobs."}, + {"success": False, "error": f"Job with ID or name '{job_id}' not found. Use cronjob(action='list') to inspect jobs."}, indent=2, ) + # Resolve to canonical ID (supports name-based lookup) + job_id = job["id"] if normalized == "remove": removed = remove_job(job_id) diff --git a/tools/file_operations.py b/tools/file_operations.py index 4b64421622fc..13d9314b9120 100644 --- a/tools/file_operations.py +++ b/tools/file_operations.py @@ -909,19 +909,29 @@ def write_file(self, path: str, content: str) -> WriteResult: if _is_write_denied(path): return WriteResult(error=f"Write denied: '{path}' is a protected system/credential file.") - # Capture pre-write content for lint-delta computation. Only do this - # when an in-process OR shell linter exists for this extension โ€” no - # point paying for the read otherwise. For in-process linters we - # pass the content directly; for shell linters the pre-state isn't - # useful (we'd have to re-write-read to lint the old version, which - # defeats the purpose), so we skip the capture and accept the naive - # "all errors" report. + # Capture pre-write content. Two consumers want it: + # + # 1. The lint-delta layer (for in-process linters like ast.parse + # and json.loads) needs the previous content to compute the + # set of NEW lint errors introduced by this write. + # 2. The LSP layer needs pre/post content to build a line-shift + # map โ€” pre-existing diagnostics below the edit point shift + # when lines are added/removed, and the shift map remaps + # baseline diagnostics into post-edit coordinates so the + # strict (range-aware) delta key matches. + # + # The set of extensions we capture pre_content for is therefore + # the UNION of in-process lint coverage and LSP coverage. For + # extensions outside both sets (binaries, opaque formats), + # skipping the read keeps the hot path fast. ext = os.path.splitext(path)[1].lower() pre_content: Optional[str] = None - if ext in LINTERS_INPROC: + want_pre = ext in LINTERS_INPROC or self._lsp_handles_extension(ext) + if want_pre: # Best-effort read; failure (file missing, permission) leaves - # pre_content as None which makes the delta step degrade - # gracefully to "report all errors". + # pre_content as None which makes both downstream consumers + # degrade gracefully (lint reports all errors; LSP skips the + # shift map). read_cmd = f"cat {self._escape_shell_arg(path)} 2>/dev/null" read_result = self._exec(read_cmd) if read_result.exit_code == 0 and read_result.stdout: @@ -966,11 +976,15 @@ def write_file(self, path: str, content: str) -> WriteResult: # Semantic diagnostics from the LSP layer โ€” separate channel. # Only fired when the syntax tier reported clean (no point asking - # an LSP for a file that won't even parse). Best-effort: - # ``""`` is returned for any failure path. + # an LSP for a file that won't even parse). Pass pre/post + # content so the LSP layer can build a line-shift map and + # remap baseline diagnostics into post-edit coordinates. + # Best-effort: ``""`` is returned for any failure path. lsp_diagnostics: Optional[str] = None if lint_result.success or lint_result.skipped: - block = self._maybe_lsp_diagnostics(path) + block = self._maybe_lsp_diagnostics( + path, pre_content=pre_content, post_content=content + ) if block: lsp_diagnostics = block @@ -1295,6 +1309,29 @@ def _lsp_local_only(self) -> bool: return False return isinstance(env, LocalEnvironment) + def _lsp_handles_extension(self, ext: str) -> bool: + """Return True iff some registered LSP server claims this extension. + + Used to decide whether to capture pre-write content for the + line-shift map. Capturing is cheap (one ``cat`` on the host) + but pointless if no LSP would ever look at the file. + + Safe to call on remote backends โ€” the registry is purely + in-process metadata; we still gate the actual LSP path on + :meth:`_lsp_local_only`. + """ + if not ext: + return False + try: + from agent.lsp.servers import SERVERS + except Exception: # noqa: BLE001 + return False + ext_lower = ext.lower() + for srv in SERVERS: + if ext_lower in srv.extensions: + return True + return False + def _snapshot_lsp_baseline(self, path: str) -> None: """Capture pre-edit LSP diagnostics so the post-write delta is correct. @@ -1318,12 +1355,25 @@ def _snapshot_lsp_baseline(self, path: str) -> None: except Exception: # noqa: BLE001 pass - def _maybe_lsp_diagnostics(self, path: str) -> str: + def _maybe_lsp_diagnostics( + self, + path: str, + *, + pre_content: Optional[str] = None, + post_content: Optional[str] = None, + ) -> str: """Best-effort LSP semantic diagnostics for ``path``. Returns a formatted ```` block, or empty string when LSP is unavailable / disabled / produced no errors. + When both ``pre_content`` and ``post_content`` are provided, + a line-shift map is built and passed to the LSPService so + baseline diagnostics are remapped into post-edit coordinates + before the set-difference. Without this, edits that delete + or insert lines surface every pre-existing diagnostic below + the edit point as "introduced by this edit". + Wraps everything in a try/except so a misbehaving LSP server can't break a write. This intentionally swallows all errors โ€” the calling tier already returned a clean syntax result, so @@ -1344,8 +1394,20 @@ def _maybe_lsp_diagnostics(self, path: str) -> str: return "" if svc is None or not svc.enabled_for(path): return "" + + # Build a line-shift map when we have both pre and post โ€” it + # remaps baseline diagnostics into post-edit coordinates so + # the strict (range-aware) delta key matches correctly. + line_shift = None + if pre_content is not None and post_content is not None and pre_content != post_content: + try: + from agent.lsp.range_shift import build_line_shift + line_shift = build_line_shift(pre_content, post_content) + except Exception: # noqa: BLE001 + line_shift = None + try: - diagnostics = svc.get_diagnostics_sync(path, delta=True) + diagnostics = svc.get_diagnostics_sync(path, delta=True, line_shift=line_shift) except Exception: # noqa: BLE001 return "" if not diagnostics: diff --git a/tools/image_generation_tool.py b/tools/image_generation_tool.py index c496166ec980..3d171f093c90 100644 --- a/tools/image_generation_tool.py +++ b/tools/image_generation_tool.py @@ -698,10 +698,7 @@ def image_generate_tool( raise ValueError("Prompt is required and must be a non-empty string") if not (fal_key_is_configured() or _resolve_managed_fal_gateway()): - message = "FAL_KEY environment variable not set" - if managed_nous_tools_enabled(): - message += " and managed FAL gateway is unavailable" - raise ValueError(message) + raise ValueError(_build_no_backend_setup_message()) aspect_lc = (aspect_ratio or DEFAULT_ASPECT_RATIO).lower().strip() if aspect_lc not in VALID_ASPECT_RATIOS: @@ -811,6 +808,42 @@ def check_fal_api_key() -> bool: return bool(fal_key_is_configured() or _resolve_managed_fal_gateway()) +def _build_no_backend_setup_message() -> str: + """Build an actionable error string when no FAL backend is reachable. + + Used by the in-tree FAL path. Mentions: + - FAL_KEY signup link + - managed-gateway status (if Nous tools are enabled) + - plugin alternative pointer (so users on a stale ``image_gen.provider`` + know the registry exists and how to inspect it) + """ + lines = ["Image generation is unavailable in this environment.", ""] + lines.append("Missing requirements:") + if managed_nous_tools_enabled(): + lines.append( + " - FAL_KEY is not set and the managed FAL gateway is unreachable" + ) + else: + lines.append(" - FAL_KEY environment variable is not set") + lines.append("") + lines.append("To enable image generation, do one of:") + lines.append( + " 1. Get a free API key at https://fal.ai and set " + "FAL_KEY= (then restart the session)" + ) + if managed_nous_tools_enabled(): + lines.append( + " 2. Sign in to a Nous account that has the managed FAL " + "gateway enabled (`hermes setup`)" + ) + lines.append( + " 3. Configure a different image_gen provider via `hermes tools` " + "โ†’ Image Generation (run `hermes plugins list` to see installed " + "backends)" + ) + return "\n".join(lines) + + def check_image_generation_requirements() -> bool: """True if any image gen backend is available. diff --git a/tools/lazy_deps.py b/tools/lazy_deps.py index 09347e8281c5..258a09ef6676 100644 --- a/tools/lazy_deps.py +++ b/tools/lazy_deps.py @@ -116,7 +116,12 @@ # โ”€โ”€โ”€ Messaging platforms (lazy-installable on demand) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ "platform.telegram": ("python-telegram-bot[webhooks]==22.6",), - "platform.discord": ("discord.py[voice]==2.7.1",), + # brotlicffi gives aiohttp a working 2-arg Decompressor.process() for + # Discord CDN's Brotli-encoded attachments. Without it, aiohttp falls + # back to google's `Brotli` package (1-arg API), and any .txt/.md/.doc + # uploaded to the Discord gateway fails to decode at att.read() with + # "Can not decode content-encoding: br" โ€” see #12511 / #15744. + "platform.discord": ("discord.py[voice]==2.7.1", "brotlicffi==1.2.0.1"), "platform.slack": ( "slack-bolt==1.27.0", "slack-sdk==3.40.1", diff --git a/tools/mcp_tool.py b/tools/mcp_tool.py index ee1843043dc5..c2668395e5dd 100644 --- a/tools/mcp_tool.py +++ b/tools/mcp_tool.py @@ -279,6 +279,11 @@ def _check_message_handler_support() -> bool: re.IGNORECASE, ) +# Pre-compiled pattern for ${VAR_NAME} style env-var interpolation. +# Supports any non-} characters in the variable name (hyphens, dots, etc.) +# so providers like MY-VAR or my.var work correctly. +_ENV_VAR_PATTERN = re.compile(r"\$\{([^}]+)\}") + # --------------------------------------------------------------------------- # Security helpers @@ -2104,7 +2109,7 @@ def _interpolate_env_vars(value): if isinstance(value, str): def _replace(m): return os.environ.get(m.group(1), m.group(0)) - return re.sub(r"\$\{([^}]+)\}", _replace, value) + return _ENV_VAR_PATTERN.sub(_replace, value) if isinstance(value, dict): return {k: _interpolate_env_vars(v) for k, v in value.items()} if isinstance(value, list): diff --git a/tools/process_registry.py b/tools/process_registry.py index 405abc04a3c0..184939adf755 100644 --- a/tools/process_registry.py +++ b/tools/process_registry.py @@ -826,6 +826,26 @@ def is_completion_consumed(self, session_id: str) -> bool: """Check if a completion notification was already consumed via wait/poll/log.""" return session_id in self._completion_consumed + def drain_notifications(self) -> "list[tuple[dict, str]]": + """Pop all pending notification events and return formatted pairs. + + Returns a list of (raw_event, formatted_text) tuples. + Skips completion events that were already consumed via wait/poll/log. + """ + results = [] + while not self.completion_queue.empty(): + try: + evt = self.completion_queue.get_nowait() + except Exception: + break + _evt_sid = evt.get("session_id", "") + if evt.get("type") == "completion" and self.is_completion_consumed(_evt_sid): + continue + text = format_process_notification(evt) + if text: + results.append((evt, text)) + return results + def get(self, session_id: str) -> Optional[ProcessSession]: """Get a session by ID (running or finished).""" with self._lock: @@ -1388,6 +1408,44 @@ def recover_from_checkpoint(self) -> int: process_registry = ProcessRegistry() +def format_process_notification(evt: dict) -> "str | None": + """Format a process notification event into a [IMPORTANT: ...] message. + + Handles completion events (notify_on_complete), watch pattern matches, + and watch disabled events from the unified completion_queue. + """ + evt_type = evt.get("type", "completion") + _sid = evt.get("session_id", "unknown") + _cmd = evt.get("command", "unknown") + + if evt_type == "watch_disabled": + return f"[IMPORTANT: {evt.get('message', '')}]" + + if evt_type == "watch_match": + _pat = evt.get("pattern", "?") + _out = evt.get("output", "") + _sup = evt.get("suppressed", 0) + text = ( + f"[IMPORTANT: Background process {_sid} matched " + f"watch pattern \"{_pat}\".\n" + f"Command: {_cmd}\n" + f"Matched output:\n{_out}" + ) + if _sup: + text += f"\n({_sup} earlier matches were suppressed by rate limit)" + text += "]" + return text + + _exit = evt.get("exit_code", "?") + _out = evt.get("output", "") + return ( + f"[IMPORTANT: Background process {_sid} completed " + f"(exit code {_exit}).\n" + f"Command: {_cmd}\n" + f"Output:\n{_out}]" + ) + + # --------------------------------------------------------------------------- # Registry -- the "process" tool schema + handler # --------------------------------------------------------------------------- diff --git a/tools/rl_training_tool.py b/tools/rl_training_tool.py deleted file mode 100644 index c7acb8012e13..000000000000 --- a/tools/rl_training_tool.py +++ /dev/null @@ -1,1396 +0,0 @@ -#!/usr/bin/env python3 -""" -RL Training Tools Module - -This module provides tools for running RL training through Tinker-Atropos. -Directly manages training processes without requiring a separate API server. - -Features: -- Environment discovery (AST-based scanning for BaseEnv subclasses) -- Configuration management with locked infrastructure settings -- Training run lifecycle via subprocess management -- WandB metrics monitoring - -Required environment variables: -- TINKER_API_KEY: API key for Tinker service -- WANDB_API_KEY: API key for Weights & Biases metrics - -Usage: - from tools.rl_training_tool import ( - rl_list_environments, - rl_select_environment, - rl_get_current_config, - rl_edit_config, - rl_start_training, - rl_check_status, - rl_stop_training, - rl_get_results, - ) -""" - -import ast -import asyncio -import importlib.util -import json -import os -import subprocess -import sys -import time -import uuid -import logging -from datetime import datetime -import yaml -from dataclasses import dataclass -from pathlib import Path -from typing import Any, Dict, List, Optional - -from hermes_constants import get_hermes_home - -logger = logging.getLogger(__name__) - -# ============================================================================ -# Path Configuration -# ============================================================================ - -# Path to tinker-atropos submodule (relative to hermes-agent root) -HERMES_ROOT = Path(__file__).parent.parent -TINKER_ATROPOS_ROOT = HERMES_ROOT / "tinker-atropos" -ENVIRONMENTS_DIR = TINKER_ATROPOS_ROOT / "tinker_atropos" / "environments" -CONFIGS_DIR = TINKER_ATROPOS_ROOT / "configs" -LOGS_DIR = get_hermes_home() / "logs" / "rl_training" - -def _ensure_logs_dir(): - """Lazily create logs directory on first use (avoid side effects at import time).""" - if TINKER_ATROPOS_ROOT.exists(): - LOGS_DIR.mkdir(exist_ok=True) - -# ============================================================================ -# Locked Configuration (Infrastructure Settings) -# ============================================================================ - -# These fields cannot be changed by the model - they're tuned for our infrastructure -LOCKED_FIELDS = { - "env": { - "tokenizer_name": "Qwen/Qwen3-8B", - "rollout_server_url": "http://localhost:8000", - "use_wandb": True, - "max_token_length": 8192, - "max_num_workers": 2048, - "worker_timeout": 3600, - "total_steps": 2500, - "steps_per_eval": 25, - "max_batches_offpolicy": 3, - "inference_weight": 1.0, - "eval_limit_ratio": 0.1, - }, - "openai": [ - { - "model_name": "Qwen/Qwen3-8B", - "base_url": "http://localhost:8001/v1", - "api_key": "x", - "weight": 1.0, - "num_requests_for_eval": 256, - "timeout": 3600, - "server_type": "sglang", # Tinker uses sglang for actual training - } - ], - "tinker": { - "lora_rank": 32, - "learning_rate": 0.00004, - "max_token_trainer_length": 9000, - "checkpoint_dir": "./temp/", - "save_checkpoint_interval": 25, - }, - "slurm": False, - "testing": False, -} - -LOCKED_FIELD_NAMES = set(LOCKED_FIELDS.get("env", {}).keys()) - - -# ============================================================================ -# State Management -# ============================================================================ - -@dataclass -class EnvironmentInfo: - """Information about a discovered environment.""" - name: str - class_name: str - file_path: str - description: str = "" - config_class: str = "BaseEnvConfig" - - -@dataclass -class RunState: - """State for a training run.""" - run_id: str - environment: str - config: Dict[str, Any] - status: str = "pending" # pending, starting, running, stopping, stopped, completed, failed - error_message: str = "" - wandb_project: str = "" - wandb_run_name: str = "" - start_time: float = 0.0 - # Process handles - api_process: Optional[subprocess.Popen] = None - trainer_process: Optional[subprocess.Popen] = None - env_process: Optional[subprocess.Popen] = None - - -# Global state -_environments: List[EnvironmentInfo] = [] -_current_env: Optional[str] = None -_current_config: Dict[str, Any] = {} -_env_config_cache: Dict[str, Dict[str, Dict[str, Any]]] = {} -_active_runs: Dict[str, RunState] = {} -_last_status_check: Dict[str, float] = {} - -# Rate limiting for status checks (30 minutes) -MIN_STATUS_CHECK_INTERVAL = 30 * 60 - - -# ============================================================================ -# Environment Discovery -# ============================================================================ - -def _scan_environments() -> List[EnvironmentInfo]: - """ - Scan the environments directory for BaseEnv subclasses using AST. - """ - environments = [] - - if not ENVIRONMENTS_DIR.exists(): - return environments - - for py_file in ENVIRONMENTS_DIR.glob("*.py"): - if py_file.name.startswith("_"): - continue - - try: - with open(py_file, "r", encoding="utf-8") as f: - tree = ast.parse(f.read()) - - for node in ast.walk(tree): - if isinstance(node, ast.ClassDef): - # Check if class has BaseEnv as base - for base in node.bases: - base_name = "" - if isinstance(base, ast.Name): - base_name = base.id - elif isinstance(base, ast.Attribute): - base_name = base.attr - - if base_name == "BaseEnv": - # Extract name from class attribute if present - env_name = py_file.stem - description = "" - config_class = "BaseEnvConfig" - - for item in node.body: - if isinstance(item, ast.Assign): - for target in item.targets: - if isinstance(target, ast.Name): - if target.id == "name" and isinstance(item.value, ast.Constant): - env_name = item.value.value - elif target.id == "env_config_cls" and isinstance(item.value, ast.Name): - config_class = item.value.id - - # Get docstring - if isinstance(item, ast.Expr) and isinstance(item.value, ast.Constant): - if isinstance(item.value.value, str) and not description: - description = item.value.value.split("\n")[0].strip() - - environments.append(EnvironmentInfo( - name=env_name, - class_name=node.name, - file_path=str(py_file), - description=description or f"Environment from {py_file.name}", - config_class=config_class, - )) - break - except Exception as e: - logger.warning("Could not parse %s: %s", py_file, e) - - return environments - - -def _get_env_config_fields(env_file_path: str) -> Dict[str, Dict[str, Any]]: - """ - Dynamically import an environment and extract its config fields. - - Uses config_init() to get the actual config class, with fallback to - directly importing BaseEnvConfig if config_init fails. - """ - try: - # Load the environment module - spec = importlib.util.spec_from_file_location("env_module", env_file_path) - module = importlib.util.module_from_spec(spec) - sys.modules["env_module"] = module - spec.loader.exec_module(module) - - # Find the BaseEnv subclass - env_class = None - for name, obj in vars(module).items(): - if isinstance(obj, type) and name != "BaseEnv": - if hasattr(obj, "config_init") and callable(getattr(obj, "config_init")): - env_class = obj - break - - if not env_class: - return {} - - # Try calling config_init to get the actual config class - config_class = None - try: - env_config, server_configs = env_class.config_init() - config_class = type(env_config) - except Exception as config_error: - # Fallback: try to import BaseEnvConfig directly from atroposlib - logger.info("config_init failed (%s), using BaseEnvConfig defaults", config_error) - try: - from atroposlib.envs.base import BaseEnvConfig - config_class = BaseEnvConfig - except ImportError: - return {} - - if not config_class: - return {} - - # Helper to make values JSON-serializable (handle enums, etc.) - def make_serializable(val): - if val is None: - return None - if hasattr(val, 'value'): # Enum - return val.value - if hasattr(val, 'name') and hasattr(val, '__class__') and 'Enum' in str(type(val)): - return val.name - return val - - # Extract fields from the Pydantic model - fields = {} - for field_name, field_info in config_class.model_fields.items(): - field_type = field_info.annotation - default = make_serializable(field_info.default) - description = field_info.description or "" - - is_locked = field_name in LOCKED_FIELD_NAMES - - # Convert type to string - type_name = getattr(field_type, "__name__", str(field_type)) - if hasattr(field_type, "__origin__"): - type_name = str(field_type) - - locked_value = LOCKED_FIELDS.get("env", {}).get(field_name, default) - current_value = make_serializable(locked_value) if is_locked else default - - fields[field_name] = { - "type": type_name, - "default": default, - "description": description, - "locked": is_locked, - "current_value": current_value, - } - - return fields - - except Exception as e: - logger.warning("Could not introspect environment config: %s", e) - return {} - - -def _initialize_environments(): - """Initialize environment list on first use.""" - global _environments - if not _environments: - _environments = _scan_environments() - - -# ============================================================================ -# Subprocess Management -# ============================================================================ - -async def _spawn_training_run(run_state: RunState, config_path: Path): - """ - Spawn the three processes needed for training: - 1. run-api (Atropos API server) - 2. launch_training.py (Tinker trainer + inference server) - 3. environment.py serve (the Atropos environment) - """ - run_id = run_state.run_id - - _ensure_logs_dir() - - # Log file paths - api_log = LOGS_DIR / f"api_{run_id}.log" - trainer_log = LOGS_DIR / f"trainer_{run_id}.log" - env_log = LOGS_DIR / f"env_{run_id}.log" - - try: - # Step 1: Start the Atropos API server (run-api) - logger.info("[%s] Starting Atropos API server (run-api)...", run_id) - - # File must stay open while the subprocess runs; we store the handle - # on run_state so _stop_training_run() can close it when done. - api_log_file = open(api_log, "w", encoding="utf-8") # closed by _stop_training_run - run_state.api_log_file = api_log_file - run_state.api_process = subprocess.Popen( - ["run-api"], - stdout=api_log_file, - stderr=subprocess.STDOUT, - cwd=str(TINKER_ATROPOS_ROOT), - ) - - # Wait for API to start - await asyncio.sleep(5) - - if run_state.api_process.poll() is not None: - run_state.status = "failed" - run_state.error_message = f"API server exited with code {run_state.api_process.returncode}. Check {api_log}" - _stop_training_run(run_state) - return - - logger.info("[%s] Atropos API server started", run_id) - - # Step 2: Start the Tinker trainer - logger.info("[%s] Starting Tinker trainer: launch_training.py --config %s", run_id, config_path) - - trainer_log_file = open(trainer_log, "w", encoding="utf-8") # closed by _stop_training_run - run_state.trainer_log_file = trainer_log_file - run_state.trainer_process = subprocess.Popen( - [sys.executable, "launch_training.py", "--config", str(config_path)], - stdout=trainer_log_file, - stderr=subprocess.STDOUT, - cwd=str(TINKER_ATROPOS_ROOT), - env={**os.environ, "TINKER_API_KEY": os.getenv("TINKER_API_KEY", "")}, - ) - - # Wait for trainer to initialize (it starts FastAPI inference server on 8001) - logger.info("[%s] Waiting 30 seconds for trainer to initialize...", run_id) - await asyncio.sleep(30) - - if run_state.trainer_process.poll() is not None: - run_state.status = "failed" - run_state.error_message = f"Trainer exited with code {run_state.trainer_process.returncode}. Check {trainer_log}" - _stop_training_run(run_state) - return - - logger.info("[%s] Trainer started, inference server on port 8001", run_id) - - # Step 3: Start the environment - logger.info("[%s] Waiting 90 more seconds before starting environment...", run_id) - await asyncio.sleep(90) - - # Find the environment file - env_info = None - for env in _environments: - if env.name == run_state.environment: - env_info = env - break - - if not env_info: - run_state.status = "failed" - run_state.error_message = f"Environment '{run_state.environment}' not found" - _stop_training_run(run_state) - return - - logger.info("[%s] Starting environment: %s serve", run_id, env_info.file_path) - - env_log_file = open(env_log, "w", encoding="utf-8") # closed by _stop_training_run - run_state.env_log_file = env_log_file - run_state.env_process = subprocess.Popen( - [sys.executable, str(env_info.file_path), "serve", "--config", str(config_path)], - stdout=env_log_file, - stderr=subprocess.STDOUT, - cwd=str(TINKER_ATROPOS_ROOT), - ) - - # Wait for environment to connect - await asyncio.sleep(10) - - if run_state.env_process.poll() is not None: - run_state.status = "failed" - run_state.error_message = f"Environment exited with code {run_state.env_process.returncode}. Check {env_log}" - _stop_training_run(run_state) - return - - run_state.status = "running" - run_state.start_time = time.time() - logger.info("[%s] Training run started successfully!", run_id) - - # Start background monitoring - asyncio.create_task(_monitor_training_run(run_state)) - - except Exception as e: - run_state.status = "failed" - run_state.error_message = str(e) - _stop_training_run(run_state) - - -async def _monitor_training_run(run_state: RunState): - """Background task to monitor a training run.""" - while run_state.status == "running": - await asyncio.sleep(30) # Check every 30 seconds - - # Check if any process has died - if run_state.env_process and run_state.env_process.poll() is not None: - exit_code = run_state.env_process.returncode - if exit_code == 0: - run_state.status = "completed" - else: - run_state.status = "failed" - run_state.error_message = f"Environment process exited with code {exit_code}" - _stop_training_run(run_state) - break - - if run_state.trainer_process and run_state.trainer_process.poll() is not None: - exit_code = run_state.trainer_process.returncode - if exit_code == 0: - run_state.status = "completed" - else: - run_state.status = "failed" - run_state.error_message = f"Trainer process exited with code {exit_code}" - _stop_training_run(run_state) - break - - if run_state.api_process and run_state.api_process.poll() is not None: - run_state.status = "failed" - run_state.error_message = "API server exited unexpectedly" - _stop_training_run(run_state) - break - - -def _stop_training_run(run_state: RunState): - """Stop all processes for a training run.""" - # Stop in reverse order: env -> trainer -> api - if run_state.env_process and run_state.env_process.poll() is None: - logger.info("[%s] Stopping environment process...", run_state.run_id) - run_state.env_process.terminate() - try: - run_state.env_process.wait(timeout=10) - except subprocess.TimeoutExpired: - run_state.env_process.kill() - - if run_state.trainer_process and run_state.trainer_process.poll() is None: - logger.info("[%s] Stopping trainer process...", run_state.run_id) - run_state.trainer_process.terminate() - try: - run_state.trainer_process.wait(timeout=10) - except subprocess.TimeoutExpired: - run_state.trainer_process.kill() - - if run_state.api_process and run_state.api_process.poll() is None: - logger.info("[%s] Stopping API server...", run_state.run_id) - run_state.api_process.terminate() - try: - run_state.api_process.wait(timeout=10) - except subprocess.TimeoutExpired: - run_state.api_process.kill() - - if run_state.status == "running": - run_state.status = "stopped" - - # Close log file handles that were opened for subprocess stdout. - for attr in ("env_log_file", "trainer_log_file", "api_log_file"): - fh = getattr(run_state, attr, None) - if fh is not None: - try: - fh.close() - except Exception: - pass - setattr(run_state, attr, None) - - -# ============================================================================ -# Environment Discovery Tools -# ============================================================================ - -async def rl_list_environments() -> str: - """ - List all available RL environments. - - Scans tinker-atropos/tinker_atropos/environments/ for Python files - containing classes that inherit from BaseEnv. - - Returns information about each environment including: - - name: Environment identifier - - class_name: Python class name - - file_path: Path to the environment file - - description: Brief description if available - - TIP: To create or modify RL environments: - 1. Use terminal/file tools to inspect existing environments - 2. Study how they load datasets, define verifiers, and structure rewards - 3. Inspect HuggingFace datasets to understand data formats - 4. Copy an existing environment as a template - - Returns: - JSON string with list of environments - """ - _initialize_environments() - - response = { - "environments": [ - { - "name": env.name, - "class_name": env.class_name, - "file_path": env.file_path, - "description": env.description, - } - for env in _environments - ], - "count": len(_environments), - "tips": [ - "Use rl_select_environment(name) to select an environment", - "Read the file_path with file tools to understand how each environment works", - "Look for load_dataset(), score_answer(), get_next_item() methods", - ] - } - - return json.dumps(response, indent=2) - - -async def rl_select_environment(name: str) -> str: - """ - Select an RL environment for training. - - This loads the environment's configuration fields into memory. - After selecting, use rl_get_current_config() to see all configurable options - and rl_edit_config() to modify specific fields. - - Args: - name: Name of the environment to select (from rl_list_environments) - - Returns: - JSON string with selection result, file path, and configurable field count - - TIP: Read the returned file_path to understand how the environment works. - """ - global _current_env, _current_config - - _initialize_environments() - - env_info = None - for env in _environments: - if env.name == name: - env_info = env - break - - if not env_info: - return json.dumps({ - "error": f"Environment '{name}' not found", - "available": [e.name for e in _environments], - }, indent=2) - - _current_env = name - - # Dynamically discover config fields - config_fields = _get_env_config_fields(env_info.file_path) - _env_config_cache[name] = config_fields - - # Initialize current config with defaults for non-locked fields - _current_config = {} - for field_name, field_info in config_fields.items(): - if not field_info.get("locked", False): - _current_config[field_name] = field_info.get("default") - - # Auto-set wandb_name to "{env_name}-DATETIME" to avoid overlaps - timestamp = datetime.now().strftime("%Y%m%d-%H%M%S") - _current_config["wandb_name"] = f"{name}-{timestamp}" - - return json.dumps({ - "message": f"Selected environment: {name}", - "environment": name, - "file_path": env_info.file_path, - }, indent=2) - - -# ============================================================================ -# Configuration Tools -# ============================================================================ - -async def rl_get_current_config() -> str: - """ - Get the current environment configuration. - - Returns all configurable fields for the selected environment. - Each environment may have different configuration options. - - Fields are divided into: - - configurable_fields: Can be changed with rl_edit_config() - - locked_fields: Infrastructure settings that cannot be changed - - Returns: - JSON string with configurable and locked fields - """ - if not _current_env: - return json.dumps({ - "error": "No environment selected. Use rl_select_environment(name) first.", - }, indent=2) - - config_fields = _env_config_cache.get(_current_env, {}) - - configurable = [] - locked = [] - - for field_name, field_info in config_fields.items(): - field_data = { - "name": field_name, - "type": field_info.get("type", "unknown"), - "default": field_info.get("default"), - "description": field_info.get("description", ""), - "current_value": _current_config.get(field_name, field_info.get("default")), - } - - if field_info.get("locked", False): - field_data["locked_value"] = LOCKED_FIELDS.get("env", {}).get(field_name) - locked.append(field_data) - else: - configurable.append(field_data) - - return json.dumps({ - "environment": _current_env, - "configurable_fields": configurable, - "locked_fields": locked, - "tip": "Use rl_edit_config(field, value) to change any configurable field.", - }, indent=2) - - -async def rl_edit_config(field: str, value: Any) -> str: - """ - Update a configuration field. - - Use rl_get_current_config() first to see available fields for the - selected environment. Each environment has different options. - - Locked fields (infrastructure settings) cannot be changed. - - Args: - field: Name of the field to update (from rl_get_current_config) - value: New value for the field - - Returns: - JSON string with updated config or error message - """ - if not _current_env: - return json.dumps({ - "error": "No environment selected. Use rl_select_environment(name) first.", - }, indent=2) - - config_fields = _env_config_cache.get(_current_env, {}) - - if field not in config_fields: - return json.dumps({ - "error": f"Unknown field '{field}'", - "available_fields": list(config_fields.keys()), - }, indent=2) - - field_info = config_fields[field] - if field_info.get("locked", False): - return json.dumps({ - "error": f"Field '{field}' is locked and cannot be changed", - "locked_value": LOCKED_FIELDS.get("env", {}).get(field), - }, indent=2) - - _current_config[field] = value - - return json.dumps({ - "message": f"Updated {field} = {value}", - "field": field, - "value": value, - "config": _current_config, - }, indent=2) - - -# ============================================================================ -# Training Management Tools -# ============================================================================ - -async def rl_start_training() -> str: - """ - Start a new RL training run with the current environment and config. - - Requires an environment to be selected first using rl_select_environment(). - Use rl_edit_config() to adjust configuration before starting. - - This spawns three processes: - 1. run-api (Atropos trajectory API) - 2. launch_training.py (Tinker trainer + inference server) - 3. environment.py serve (the selected environment) - - WARNING: Training runs take hours. Use rl_check_status() to monitor - progress (recommended: check every 30 minutes at most). - - Returns: - JSON string with run_id and initial status - """ - if not _current_env: - return json.dumps({ - "error": "No environment selected. Use rl_select_environment(name) first.", - }, indent=2) - - # Check API keys - if not os.getenv("TINKER_API_KEY"): - return json.dumps({ - "error": "TINKER_API_KEY not set. Add it to ~/.hermes/.env", - }, indent=2) - - # Find environment file - env_info = None - for env in _environments: - if env.name == _current_env: - env_info = env - break - - if not env_info or not Path(env_info.file_path).exists(): - return json.dumps({ - "error": f"Environment file not found for '{_current_env}'", - }, indent=2) - - # Generate run ID - run_id = str(uuid.uuid4())[:8] - - # Create config YAML - CONFIGS_DIR.mkdir(exist_ok=True) - config_path = CONFIGS_DIR / f"run_{run_id}.yaml" - - # Start with locked config as base - import copy - run_config = copy.deepcopy(LOCKED_FIELDS) - - if "env" not in run_config: - run_config["env"] = {} - - # Apply configurable fields - for field_name, value in _current_config.items(): - if value is not None and value != "": - run_config["env"][field_name] = value - - # Set WandB settings - wandb_project = _current_config.get("wandb_project", "atropos-tinker") - if "tinker" not in run_config: - run_config["tinker"] = {} - run_config["tinker"]["wandb_project"] = wandb_project - run_config["tinker"]["wandb_run_name"] = f"{_current_env}-{run_id}" - - if "wandb_name" in _current_config and _current_config["wandb_name"]: - run_config["env"]["wandb_name"] = _current_config["wandb_name"] - - with open(config_path, "w", encoding="utf-8") as f: - yaml.dump(run_config, f, default_flow_style=False) - - # Create run state - run_state = RunState( - run_id=run_id, - environment=_current_env, - config=_current_config.copy(), - status="starting", - wandb_project=wandb_project, - wandb_run_name=f"{_current_env}-{run_id}", - ) - - _active_runs[run_id] = run_state - - # Start training in background - asyncio.create_task(_spawn_training_run(run_state, config_path)) - - return json.dumps({ - "run_id": run_id, - "status": "starting", - "environment": _current_env, - "config": _current_config, - "wandb_project": wandb_project, - "wandb_run_name": f"{_current_env}-{run_id}", - "config_path": str(config_path), - "logs": { - "api": str(LOGS_DIR / f"api_{run_id}.log"), - "trainer": str(LOGS_DIR / f"trainer_{run_id}.log"), - "env": str(LOGS_DIR / f"env_{run_id}.log"), - }, - "message": "Training starting. Use rl_check_status(run_id) to monitor (recommended: every 30 minutes).", - }, indent=2) - - -async def rl_check_status(run_id: str) -> str: - """ - Get status and metrics for a training run. - - RATE LIMITED: For long-running training, this function enforces a - minimum 30-minute interval between checks for the same run_id. - - Args: - run_id: The run ID returned by rl_start_training() - - Returns: - JSON string with run status and metrics - """ - # Check rate limiting - now = time.time() - if run_id in _last_status_check: - elapsed = now - _last_status_check[run_id] - if elapsed < MIN_STATUS_CHECK_INTERVAL: - remaining = MIN_STATUS_CHECK_INTERVAL - elapsed - return json.dumps({ - "rate_limited": True, - "run_id": run_id, - "message": f"Rate limited. Next check available in {remaining/60:.0f} minutes.", - "next_check_in_seconds": remaining, - }, indent=2) - - _last_status_check[run_id] = now - - if run_id not in _active_runs: - return json.dumps({ - "error": f"Run '{run_id}' not found", - "active_runs": list(_active_runs.keys()), - }, indent=2) - - run_state = _active_runs[run_id] - - # Check process status - processes = { - "api": run_state.api_process.poll() if run_state.api_process else None, - "trainer": run_state.trainer_process.poll() if run_state.trainer_process else None, - "env": run_state.env_process.poll() if run_state.env_process else None, - } - - running_time = time.time() - run_state.start_time if run_state.start_time else 0 - - result = { - "run_id": run_id, - "status": run_state.status, - "environment": run_state.environment, - "running_time_minutes": running_time / 60, - "processes": { - name: "running" if code is None else f"exited ({code})" - for name, code in processes.items() - }, - "wandb_project": run_state.wandb_project, - "wandb_run_name": run_state.wandb_run_name, - "logs": { - "api": str(LOGS_DIR / f"api_{run_id}.log"), - "trainer": str(LOGS_DIR / f"trainer_{run_id}.log"), - "env": str(LOGS_DIR / f"env_{run_id}.log"), - }, - } - - if run_state.error_message: - result["error"] = run_state.error_message - - # Try to get WandB metrics if available - try: - import wandb - api = wandb.Api() - runs = api.runs( - f"{os.getenv('WANDB_ENTITY', 'nousresearch')}/{run_state.wandb_project}", - filters={"display_name": run_state.wandb_run_name} - ) - if runs: - wandb_run = runs[0] - result["wandb_url"] = wandb_run.url - result["metrics"] = { - "step": wandb_run.summary.get("_step", 0), - "reward_mean": wandb_run.summary.get("train/reward_mean"), - "percent_correct": wandb_run.summary.get("train/percent_correct"), - "eval_percent_correct": wandb_run.summary.get("eval/percent_correct"), - } - except Exception as e: - result["wandb_error"] = str(e) - - return json.dumps(result, indent=2) - - -async def rl_stop_training(run_id: str) -> str: - """ - Stop a running training job. - - Args: - run_id: The run ID to stop - - Returns: - JSON string with stop confirmation - """ - if run_id not in _active_runs: - return json.dumps({ - "error": f"Run '{run_id}' not found", - "active_runs": list(_active_runs.keys()), - }, indent=2) - - run_state = _active_runs[run_id] - - if run_state.status not in {"running", "starting"}: - return json.dumps({ - "message": f"Run '{run_id}' is not running (status: {run_state.status})", - }, indent=2) - - _stop_training_run(run_state) - - return json.dumps({ - "message": f"Stopped training run '{run_id}'", - "run_id": run_id, - "status": run_state.status, - }, indent=2) - - -async def rl_get_results(run_id: str) -> str: - """ - Get final results and metrics for a training run. - - Args: - run_id: The run ID to get results for - - Returns: - JSON string with final results - """ - if run_id not in _active_runs: - return json.dumps({ - "error": f"Run '{run_id}' not found", - }, indent=2) - - run_state = _active_runs[run_id] - - result = { - "run_id": run_id, - "status": run_state.status, - "environment": run_state.environment, - "wandb_project": run_state.wandb_project, - "wandb_run_name": run_state.wandb_run_name, - } - - # Get WandB metrics - try: - import wandb - api = wandb.Api() - runs = api.runs( - f"{os.getenv('WANDB_ENTITY', 'nousresearch')}/{run_state.wandb_project}", - filters={"display_name": run_state.wandb_run_name} - ) - if runs: - wandb_run = runs[0] - result["wandb_url"] = wandb_run.url - result["final_metrics"] = dict(wandb_run.summary) - result["history"] = [dict(row) for row in wandb_run.history(samples=10)] - except Exception as e: - result["wandb_error"] = str(e) - - return json.dumps(result, indent=2) - - -async def rl_list_runs() -> str: - """ - List all training runs (active and completed). - - Returns: - JSON string with list of runs and their status - """ - runs = [] - for run_id, run_state in _active_runs.items(): - runs.append({ - "run_id": run_id, - "environment": run_state.environment, - "status": run_state.status, - "wandb_run_name": run_state.wandb_run_name, - }) - - return json.dumps({ - "runs": runs, - "count": len(runs), - }, indent=2) - - -# ============================================================================ -# Inference Testing (via Atropos `process` mode with OpenRouter) -# ============================================================================ - -# Test models at different scales for robustness testing -# These are cheap, capable models on OpenRouter for testing parsing/scoring -TEST_MODELS = [ - {"id": "qwen/qwen3-8b", "name": "Qwen3 8B", "scale": "small"}, - {"id": "z-ai/glm-4.7-flash", "name": "GLM-4.7 Flash", "scale": "medium"}, - {"id": "minimax/minimax-m2.7", "name": "MiniMax M2.7", "scale": "large"}, -] - -# Default test parameters - quick but representative -DEFAULT_NUM_STEPS = 3 # Number of steps (items) to test -DEFAULT_GROUP_SIZE = 16 # Completions per item (like training) - - -async def rl_test_inference( - num_steps: int = DEFAULT_NUM_STEPS, - group_size: int = DEFAULT_GROUP_SIZE, - models: Optional[List[str]] = None, -) -> str: - """ - Quick inference test for any environment using Atropos's `process` mode. - - Runs a few steps of inference + scoring to validate: - - Environment loads correctly - - Prompt construction works - - Inference parsing is robust (tested with multiple model scales) - - Verifier/scoring logic works - - Default: 3 steps ร— 16 completions = 48 total rollouts per model. - Tests 3 models = 144 total rollouts. Quick sanity check. - - Test models (varying intelligence levels for robustness): - - qwen/qwen3-8b (small) - - zhipu-ai/glm-4-flash (medium) - - minimax/minimax-m1 (large) - - Args: - num_steps: Steps to run (default: 3, max recommended for testing) - group_size: Completions per step (default: 16, like training) - models: Optional model IDs to test. If None, uses all 3 test models. - - Returns: - JSON with results per model: steps_tested, accuracy, scores - """ - if not _current_env: - return json.dumps({ - "error": "No environment selected. Use rl_select_environment(name) first.", - }, indent=2) - - api_key = os.getenv("OPENROUTER_API_KEY") - if not api_key: - return json.dumps({ - "error": "OPENROUTER_API_KEY not set. Required for inference testing.", - }, indent=2) - - # Find environment info - env_info = None - for env in _environments: - if env.name == _current_env: - env_info = env - break - - if not env_info: - return json.dumps({ - "error": f"Environment '{_current_env}' not found", - }, indent=2) - - # Determine which models to test - if models: - test_models = [m for m in TEST_MODELS if m["id"] in models] - if not test_models: - test_models = [{"id": m, "name": m, "scale": "custom"} for m in models] - else: - test_models = TEST_MODELS - - # Calculate total rollouts for logging - total_rollouts_per_model = num_steps * group_size - total_rollouts = total_rollouts_per_model * len(test_models) - - results = { - "environment": _current_env, - "environment_file": env_info.file_path, - "test_config": { - "num_steps": num_steps, - "group_size": group_size, - "rollouts_per_model": total_rollouts_per_model, - "total_rollouts": total_rollouts, - }, - "models_tested": [], - } - - # Create output directory for test results - _ensure_logs_dir() - test_output_dir = LOGS_DIR / "inference_tests" - test_output_dir.mkdir(exist_ok=True) - - for model_info in test_models: - model_id = model_info["id"] - model_safe_name = model_id.replace("/", "_") - - print(f"\n{'='*60}") - print(f"Testing with {model_info['name']} ({model_id})") - print(f"{'='*60}") - - # Output file for this test run - output_file = test_output_dir / f"test_{_current_env}_{model_safe_name}.jsonl" - - # Generate unique run ID for wandb - test_run_id = str(uuid.uuid4())[:8] - wandb_run_name = f"test_inference_RSIAgent_{_current_env}_{test_run_id}" - - # Build the process command using Atropos's built-in CLI - # This runs the environment's actual code with OpenRouter as the inference backend - # We pass our locked settings + test-specific overrides via CLI args - cmd = [ - sys.executable, env_info.file_path, "process", - # Test-specific overrides - "--env.total_steps", str(num_steps), - "--env.group_size", str(group_size), - "--env.use_wandb", "true", # Enable wandb for test tracking - "--env.wandb_name", wandb_run_name, - "--env.data_path_to_save_groups", str(output_file), - # Use locked settings from our config - "--env.tokenizer_name", LOCKED_FIELDS["env"]["tokenizer_name"], - "--env.max_token_length", str(LOCKED_FIELDS["env"]["max_token_length"]), - "--env.max_num_workers", str(LOCKED_FIELDS["env"]["max_num_workers"]), - "--env.max_batches_offpolicy", str(LOCKED_FIELDS["env"]["max_batches_offpolicy"]), - # OpenRouter config for inference testing - # IMPORTANT: Use server_type=openai for OpenRouter (not sglang) - # sglang is only for actual training with Tinker's inference server - "--openai.base_url", "https://openrouter.ai/api/v1", - "--openai.api_key", api_key, - "--openai.model_name", model_id, - "--openai.server_type", "openai", # OpenRouter is OpenAI-compatible - "--openai.health_check", "false", # OpenRouter doesn't have health endpoint - ] - - # Debug: Print the full command - cmd_str = " ".join(str(c) for c in cmd) - # Hide API key in printed output - cmd_display = cmd_str.replace(api_key, "***API_KEY***") - print(f"Command: {cmd_display}") - print(f"Working dir: {TINKER_ATROPOS_ROOT}") - print(f"WandB run: {wandb_run_name}") - print(f" {num_steps} steps ร— {group_size} completions = {total_rollouts_per_model} rollouts") - - model_results = { - "model": model_id, - "name": model_info["name"], - "scale": model_info["scale"], - "wandb_run": wandb_run_name, - "output_file": str(output_file), - "steps": [], - "steps_tested": 0, - "total_completions": 0, - "correct_completions": 0, - } - - try: - # Run the process command with real-time output streaming - process = await asyncio.create_subprocess_exec( - *cmd, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - cwd=str(TINKER_ATROPOS_ROOT), - ) - - # Stream output in real-time while collecting for logs - stdout_lines = [] - stderr_lines = [] - log_file = test_output_dir / f"test_{_current_env}_{model_safe_name}.log" - - async def read_stream(stream, lines_list, prefix=""): - """Read stream line by line and print in real-time.""" - while True: - line = await stream.readline() - if not line: - break - decoded = line.decode().rstrip() - lines_list.append(decoded) - # Print progress-related lines in real-time - if any(kw in decoded.lower() for kw in ['processing', 'group', 'step', 'progress', '%', 'completed']): - print(f" {prefix}{decoded}") - - # Read both streams concurrently with timeout - try: - await asyncio.wait_for( - asyncio.gather( - read_stream(process.stdout, stdout_lines, "๐Ÿ“Š "), - read_stream(process.stderr, stderr_lines, "โš ๏ธ "), - ), - timeout=600, # 10 minute timeout per model - ) - except asyncio.TimeoutError: - process.kill() - raise - - await process.wait() - - # Combine output for logging - stdout_text = "\n".join(stdout_lines) - stderr_text = "\n".join(stderr_lines) - - # Write logs to files for inspection outside CLI - with open(log_file, "w", encoding="utf-8") as f: - f.write(f"Command: {cmd_display}\n") - f.write(f"Working dir: {TINKER_ATROPOS_ROOT}\n") - f.write(f"Return code: {process.returncode}\n") - f.write(f"\n{'='*60}\n") - f.write(f"STDOUT:\n{'='*60}\n") - f.write(stdout_text or "(empty)\n") - f.write(f"\n{'='*60}\n") - f.write(f"STDERR:\n{'='*60}\n") - f.write(stderr_text or "(empty)\n") - - print(f" Log file: {log_file}") - - if process.returncode != 0: - model_results["error"] = f"Process exited with code {process.returncode}" - model_results["stderr"] = stderr_text[-1000:] - model_results["stdout"] = stdout_text[-1000:] - model_results["log_file"] = str(log_file) - print(f"\n โŒ Error: {model_results['error']}") - # Print last few lines of stderr for debugging - if stderr_lines: - print(" Last errors:") - for line in stderr_lines[-5:]: - print(f" {line}") - else: - print("\n โœ… Process completed successfully") - print(f" Output file: {output_file}") - print(f" File exists: {output_file.exists()}") - - # Parse the output JSONL file - if output_file.exists(): - # Read JSONL file (one JSON object per line = one step) - with open(output_file, "r", encoding="utf-8") as f: - for line in f: - line = line.strip() - if not line: - continue - try: - item = json.loads(line) - scores = item.get("scores", []) - model_results["steps_tested"] += 1 - model_results["total_completions"] += len(scores) - correct = sum(1 for s in scores if s > 0) - model_results["correct_completions"] += correct - - model_results["steps"].append({ - "step": model_results["steps_tested"], - "completions": len(scores), - "correct": correct, - "scores": scores, - }) - except json.JSONDecodeError: - continue - - print(f" Completed {model_results['steps_tested']} steps") - else: - model_results["error"] = f"Output file not created: {output_file}" - - except asyncio.TimeoutError: - model_results["error"] = "Process timed out after 10 minutes" - print(" Timeout!") - except Exception as e: - model_results["error"] = str(e) - print(f" Error: {e}") - - # Calculate stats - if model_results["total_completions"] > 0: - model_results["accuracy"] = round( - model_results["correct_completions"] / model_results["total_completions"], 3 - ) - else: - model_results["accuracy"] = 0 - - if model_results["steps_tested"] > 0: - steps_with_correct = sum(1 for s in model_results["steps"] if s.get("correct", 0) > 0) - model_results["steps_with_correct"] = steps_with_correct - model_results["step_success_rate"] = round( - steps_with_correct / model_results["steps_tested"], 3 - ) - else: - model_results["steps_with_correct"] = 0 - model_results["step_success_rate"] = 0 - - print(f" Results: {model_results['correct_completions']}/{model_results['total_completions']} correct") - print(f" Accuracy: {model_results['accuracy']:.1%}") - - results["models_tested"].append(model_results) - - # Overall summary - working_models = [m for m in results["models_tested"] if m.get("steps_tested", 0) > 0] - - results["summary"] = { - "steps_requested": num_steps, - "models_tested": len(test_models), - "models_succeeded": len(working_models), - "best_model": max(working_models, key=lambda x: x.get("accuracy", 0))["model"] if working_models else None, - "avg_accuracy": round( - sum(m.get("accuracy", 0) for m in working_models) / len(working_models), 3 - ) if working_models else 0, - "environment_working": bool(working_models), - "output_directory": str(test_output_dir), - } - - return json.dumps(results, indent=2) - - -# ============================================================================ -# Requirements Check -# ============================================================================ - -def check_rl_python_version() -> bool: - """ - Check if Python version meets the minimum for RL tools. - - tinker-atropos depends on the 'tinker' package which requires Python >= 3.11. - """ - return sys.version_info >= (3, 11) - - -def check_rl_api_keys() -> bool: - """ - Check if required API keys and Python version are available. - - RL training requires: - - Python >= 3.11 (tinker package requirement) - - TINKER_API_KEY for the Tinker training API - - WANDB_API_KEY for Weights & Biases metrics - """ - if not check_rl_python_version(): - return False - tinker_key = os.getenv("TINKER_API_KEY") - wandb_key = os.getenv("WANDB_API_KEY") - return bool(tinker_key) and bool(wandb_key) - - -def get_missing_keys() -> List[str]: - """ - Get list of missing requirements for RL tools (API keys and Python version). - """ - missing = [] - if not check_rl_python_version(): - missing.append(f"Python >= 3.11 (current: {sys.version_info.major}.{sys.version_info.minor})") - if not os.getenv("TINKER_API_KEY"): - missing.append("TINKER_API_KEY") - if not os.getenv("WANDB_API_KEY"): - missing.append("WANDB_API_KEY") - return missing - - -# --------------------------------------------------------------------------- -# Schemas + Registry -# --------------------------------------------------------------------------- -from tools.registry import registry - -RL_LIST_ENVIRONMENTS_SCHEMA = {"name": "rl_list_environments", "description": "List all available RL environments. Returns environment names, paths, and descriptions. TIP: Read the file_path with file tools to understand how each environment works (verifiers, data loading, rewards).", "parameters": {"type": "object", "properties": {}, "required": []}} -RL_SELECT_ENVIRONMENT_SCHEMA = {"name": "rl_select_environment", "description": "Select an RL environment for training. Loads the environment's default configuration. After selecting, use rl_get_current_config() to see settings and rl_edit_config() to modify them.", "parameters": {"type": "object", "properties": {"name": {"type": "string", "description": "Name of the environment to select (from rl_list_environments)"}}, "required": ["name"]}} -RL_GET_CURRENT_CONFIG_SCHEMA = {"name": "rl_get_current_config", "description": "Get the current environment configuration. Returns only fields that can be modified: group_size, max_token_length, total_steps, steps_per_eval, use_wandb, wandb_name, max_num_workers.", "parameters": {"type": "object", "properties": {}, "required": []}} -RL_EDIT_CONFIG_SCHEMA = {"name": "rl_edit_config", "description": "Update a configuration field. Use rl_get_current_config() first to see all available fields for the selected environment. Each environment has different configurable options. Infrastructure settings (tokenizer, URLs, lora_rank, learning_rate) are locked.", "parameters": {"type": "object", "properties": {"field": {"type": "string", "description": "Name of the field to update (get available fields from rl_get_current_config)"}, "value": {"description": "New value for the field"}}, "required": ["field", "value"]}} -RL_START_TRAINING_SCHEMA = {"name": "rl_start_training", "description": "Start a new RL training run with the current environment and config. Most training parameters (lora_rank, learning_rate, etc.) are fixed. Use rl_edit_config() to set group_size, batch_size, wandb_project before starting. WARNING: Training takes hours.", "parameters": {"type": "object", "properties": {}, "required": []}} -RL_CHECK_STATUS_SCHEMA = {"name": "rl_check_status", "description": "Get status and metrics for a training run. RATE LIMITED: enforces 30-minute minimum between checks for the same run. Returns WandB metrics: step, state, reward_mean, loss, percent_correct.", "parameters": {"type": "object", "properties": {"run_id": {"type": "string", "description": "The run ID from rl_start_training()"}}, "required": ["run_id"]}} -RL_STOP_TRAINING_SCHEMA = {"name": "rl_stop_training", "description": "Stop a running training job. Use if metrics look bad, training is stagnant, or you want to try different settings.", "parameters": {"type": "object", "properties": {"run_id": {"type": "string", "description": "The run ID to stop"}}, "required": ["run_id"]}} -RL_GET_RESULTS_SCHEMA = {"name": "rl_get_results", "description": "Get final results and metrics for a completed training run. Returns final metrics and path to trained weights.", "parameters": {"type": "object", "properties": {"run_id": {"type": "string", "description": "The run ID to get results for"}}, "required": ["run_id"]}} -RL_LIST_RUNS_SCHEMA = {"name": "rl_list_runs", "description": "List all training runs (active and completed) with their status.", "parameters": {"type": "object", "properties": {}, "required": []}} -RL_TEST_INFERENCE_SCHEMA = {"name": "rl_test_inference", "description": "Quick inference test for any environment. Runs a few steps of inference + scoring using OpenRouter. Default: 3 steps x 16 completions = 48 rollouts per model, testing 3 models = 144 total. Tests environment loading, prompt construction, inference parsing, and verifier logic. Use BEFORE training to catch issues.", "parameters": {"type": "object", "properties": {"num_steps": {"type": "integer", "description": "Number of steps to run (default: 3, recommended max for testing)", "default": 3}, "group_size": {"type": "integer", "description": "Completions per step (default: 16, like training)", "default": 16}, "models": {"type": "array", "items": {"type": "string"}, "description": "Optional list of OpenRouter model IDs. Default: qwen/qwen3-8b, z-ai/glm-4.7-flash, minimax/minimax-m2.7"}}, "required": []}} - -_rl_env = ["TINKER_API_KEY", "WANDB_API_KEY"] - -registry.register(name="rl_list_environments", emoji="๐Ÿงช", toolset="rl", schema=RL_LIST_ENVIRONMENTS_SCHEMA, - handler=lambda args, **kw: rl_list_environments(), check_fn=check_rl_api_keys, requires_env=_rl_env, is_async=True) -registry.register(name="rl_select_environment", emoji="๐Ÿงช", toolset="rl", schema=RL_SELECT_ENVIRONMENT_SCHEMA, - handler=lambda args, **kw: rl_select_environment(name=args.get("name", "")), check_fn=check_rl_api_keys, requires_env=_rl_env, is_async=True) -registry.register(name="rl_get_current_config", emoji="๐Ÿงช", toolset="rl", schema=RL_GET_CURRENT_CONFIG_SCHEMA, - handler=lambda args, **kw: rl_get_current_config(), check_fn=check_rl_api_keys, requires_env=_rl_env, is_async=True) -registry.register(name="rl_edit_config", emoji="๐Ÿงช", toolset="rl", schema=RL_EDIT_CONFIG_SCHEMA, - handler=lambda args, **kw: rl_edit_config(field=args.get("field", ""), value=args.get("value")), check_fn=check_rl_api_keys, requires_env=_rl_env, is_async=True) -registry.register(name="rl_start_training", emoji="๐Ÿงช", toolset="rl", schema=RL_START_TRAINING_SCHEMA, - handler=lambda args, **kw: rl_start_training(), check_fn=check_rl_api_keys, requires_env=_rl_env, is_async=True) -registry.register(name="rl_check_status", emoji="๐Ÿงช", toolset="rl", schema=RL_CHECK_STATUS_SCHEMA, - handler=lambda args, **kw: rl_check_status(run_id=args.get("run_id", "")), check_fn=check_rl_api_keys, requires_env=_rl_env, is_async=True) -registry.register(name="rl_stop_training", emoji="๐Ÿงช", toolset="rl", schema=RL_STOP_TRAINING_SCHEMA, - handler=lambda args, **kw: rl_stop_training(run_id=args.get("run_id", "")), check_fn=check_rl_api_keys, requires_env=_rl_env, is_async=True) -registry.register(name="rl_get_results", emoji="๐Ÿงช", toolset="rl", schema=RL_GET_RESULTS_SCHEMA, - handler=lambda args, **kw: rl_get_results(run_id=args.get("run_id", "")), check_fn=check_rl_api_keys, requires_env=_rl_env, is_async=True) -registry.register(name="rl_list_runs", emoji="๐Ÿงช", toolset="rl", schema=RL_LIST_RUNS_SCHEMA, - handler=lambda args, **kw: rl_list_runs(), check_fn=check_rl_api_keys, requires_env=_rl_env, is_async=True) -registry.register(name="rl_test_inference", emoji="๐Ÿงช", toolset="rl", schema=RL_TEST_INFERENCE_SCHEMA, - handler=lambda args, **kw: rl_test_inference(num_steps=args.get("num_steps", 3), group_size=args.get("group_size", 16), models=args.get("models")), - check_fn=check_rl_api_keys, requires_env=_rl_env, is_async=True) diff --git a/tools/send_message_tool.py b/tools/send_message_tool.py index d5b2c0c782cd..b97ecac9b3b0 100644 --- a/tools/send_message_tool.py +++ b/tools/send_message_tool.py @@ -12,6 +12,7 @@ import re import ssl import time +import uuid from email.utils import formatdate from typing import Dict, Optional @@ -30,6 +31,7 @@ _SLACK_TARGET_RE = re.compile(r"^\s*([CGD][A-Z0-9]{8,})\s*$") _WEIXIN_TARGET_RE = re.compile(r"^\s*((?:wxid|gh|v\d+|wm|wb)_[A-Za-z0-9_-]+|[A-Za-z0-9._-]+@chatroom|filehelper)\s*$") _YUANBAO_TARGET_RE = re.compile(r"^\s*((?:group|direct):[^:]+)\s*$") +_TLON_CHANNEL_TARGET_RE = re.compile(r"^\s*((?:chat|heap|diary)/~[^/]+/.+?)(?::([^:]+))?\s*$") # Discord snowflake IDs are numeric, same regex pattern as Telegram topic targets. _NUMERIC_TOPIC_RE = _TELEGRAM_TOPIC_TARGET_RE # Platforms that address recipients by phone number and accept E.164 format @@ -317,6 +319,13 @@ def _handle_send(args): def _parse_target_ref(platform_name: str, target_ref: str): """Parse a tool target into chat_id/thread_id and whether it is explicit.""" + if platform_name == "tlon": + target_ref = target_ref.strip() + if target_ref.startswith("~"): + return target_ref, None, True + match = _TLON_CHANNEL_TARGET_RE.fullmatch(target_ref) + if match: + return match.group(1), match.group(2), True if platform_name == "telegram": match = _TELEGRAM_TOPIC_TARGET_RE.fullmatch(target_ref) if match: @@ -730,6 +739,8 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None, result = await _send_qqbot(pconfig, chat_id, chunk) elif platform == Platform.YUANBAO: result = await _send_yuanbao(chat_id, chunk) + elif platform == Platform.TLON: + result = await _send_tlon(pconfig.extra, chat_id, chunk, thread_id=thread_id) else: # Plugin platform: route through the gateway's live adapter if # available, otherwise the plugin's standalone_sender_fn. @@ -1760,6 +1771,175 @@ async def _send_feishu(pconfig, chat_id, message, media_files=None, thread_id=No return _error(f"Feishu send failed: {e}") +async def _send_tlon(extra, chat_id, message, thread_id=None): + """Send via a Tlon ship HTTP API without requiring a live SSE adapter.""" + try: + import aiohttp + + from gateway.platforms.tlon import ( + _da_from_unix, + _format_ud, + _normalize_ship, + _text_to_story, + ) + + ship_url = (extra or {}).get("ship_url") or os.getenv("TLON_SHIP_URL", "") + ship_name = (extra or {}).get("ship_name") or os.getenv("TLON_SHIP_NAME", "") + ship_code = os.getenv("TLON_SHIP_CODE", "") + if not all([ship_url, ship_name, ship_code]): + return _error("Tlon not configured (TLON_SHIP_URL/NAME/CODE required)") + + ship_url = ship_url.rstrip("/") + ship_name = _normalize_ship(ship_name) + + async with aiohttp.ClientSession() as session: + async with session.post( + f"{ship_url}/~/login", + data={"password": ship_code}, + allow_redirects=False, + timeout=aiohttp.ClientTimeout(total=15), + ) as resp: + if resp.status not in (200, 204, 302, 303, 307): + return _error(f"Tlon auth failed: HTTP {resp.status}") + cookie = resp.headers.get("set-cookie", "") + if not cookie: + for c in session.cookie_jar: + if c.key.startswith("urbauth"): + cookie = f"{c.key}={c.value}" + break + if not cookie: + return _error("Tlon auth failed: no urbauth cookie received") + + channel_id = f"{int(time.time())}-{uuid.uuid4().hex[:8]}" + channel_url = f"{ship_url}/~/channel/{channel_id}" + sent_at = int(time.time() * 1000) + story = _text_to_story(message) + bare_ship = ship_name.lstrip("~") + author = ship_name + writ_id = f"{author}/{_da_from_unix(sent_at)}" + essay = { + "content": story, + "author": author, + "sent": sent_at, + "kind": "/chat", + "meta": None, + "blob": None, + } + if thread_id: + bare_thread_id = str(thread_id).replace(".", "") + if bare_thread_id.isdigit(): + thread_id = _format_ud(int(bare_thread_id)) + + if str(chat_id).startswith("~"): + if thread_id: + dm_diff = { + "id": thread_id, + "delta": { + "reply": { + "id": writ_id, + "meta": None, + "delta": { + "add": { + "memo": { + "content": story, + "author": author, + "sent": sent_at, + }, + "time": None, + } + }, + } + }, + } + else: + dm_diff = { + "id": writ_id, + "delta": {"add": {"essay": essay, "time": None}}, + } + app = "chat" + mark = os.getenv("TLON_DM_ACTION_MARK", "chat-dm-action-1") + poke_json = { + "ship": _normalize_ship(str(chat_id)), + "diff": dm_diff, + } + else: + if thread_id: + post_action = { + "reply": { + "id": thread_id, + "action": { + "add": { + "content": story, + "author": author, + "sent": sent_at, + } + }, + } + } + else: + post_action = { + "add": { + "content": story, + "author": author, + "sent": sent_at, + "kind": ( + "/diary" if str(chat_id).startswith("diary/") + else "/heap" if str(chat_id).startswith("heap/") + else "/chat" + ), + "meta": None, + "blob": None, + } + } + app = "channels" + mark = os.getenv("TLON_CHANNEL_ACTION_MARK", "channel-action-1") + poke_json = { + "channel": { + "nest": chat_id, + "action": {"post": post_action}, + } + } + + headers = {"Content-Type": "application/json", "Cookie": cookie} + async with session.put( + channel_url, + json=[{ + "id": 1, + "action": "poke", + "ship": bare_ship, + "app": app, + "mark": mark, + "json": poke_json, + }], + headers=headers, + timeout=aiohttp.ClientTimeout(total=30), + ) as resp: + if resp.status not in (200, 204): + text = await resp.text() + return _error(f"Tlon send failed: HTTP {resp.status} - {text[:200]}") + + try: + async with session.delete( + channel_url, + headers=headers, + timeout=aiohttp.ClientTimeout(total=5), + ): + pass + except Exception: + pass + + return { + "success": True, + "platform": "tlon", + "chat_id": chat_id, + "message_id": f"{ship_name}/{sent_at}", + } + except ImportError: + return _error("aiohttp not installed. Run: pip install aiohttp") + except Exception as e: + return _error(f"Tlon send error: {e}") + + def _check_send_message(): """Gate send_message on gateway running (always available on messaging platforms). diff --git a/tools/skills_guard.py b/tools/skills_guard.py index 363e983da1a9..1610c3225cb6 100644 --- a/tools/skills_guard.py +++ b/tools/skills_guard.py @@ -36,7 +36,7 @@ # Hardcoded trust configuration # --------------------------------------------------------------------------- -TRUSTED_REPOS = {"openai/skills", "anthropics/skills"} +TRUSTED_REPOS = {"openai/skills", "anthropics/skills", "huggingface/skills"} INSTALL_POLICY = { # safe caution dangerous diff --git a/tools/skills_hub.py b/tools/skills_hub.py index 3e2c27c338a1..35cec56e08e8 100644 --- a/tools/skills_hub.py +++ b/tools/skills_hub.py @@ -329,6 +329,7 @@ class GitHubSource(SkillSource): DEFAULT_TAPS = [ {"repo": "openai/skills", "path": "skills/"}, {"repo": "anthropics/skills", "path": "skills/"}, + {"repo": "huggingface/skills", "path": "skills/"}, {"repo": "VoltAgent/awesome-agent-skills", "path": "skills/"}, {"repo": "garrytan/gstack", "path": ""}, {"repo": "MiniMax-AI/cli", "path": "skill/"}, diff --git a/tools/url_safety.py b/tools/url_safety.py index 743510b2757f..0f3dd597e490 100644 --- a/tools/url_safety.py +++ b/tools/url_safety.py @@ -263,6 +263,9 @@ def is_safe_url(url: str) -> bool: parsed = urlparse(url) hostname = (parsed.hostname or "").strip().lower().rstrip(".") scheme = (parsed.scheme or "").strip().lower() + if scheme not in {"http", "https"}: + logger.warning("Blocked request โ€” unsupported URL scheme: %s", scheme or "") + return False if not hostname: return False diff --git a/tools/web_tools.py b/tools/web_tools.py index e2743248d227..597edb0c8fde 100644 --- a/tools/web_tools.py +++ b/tools/web_tools.py @@ -586,11 +586,20 @@ async def summarize_chunk(chunk_idx: int, chunk_content: str) -> tuple[int, Opti # Run all chunk summarizations in parallel tasks = [summarize_chunk(i, chunk) for i, chunk in enumerate(chunks)] - results = await asyncio.gather(*tasks) - - # Collect successful summaries in order + # Use return_exceptions=True so a single task failure does not discard + # all other successfully summarized chunks. + results = await asyncio.gather(*tasks, return_exceptions=True) + + # Filter out exceptions, then collect successful summaries in order + successful_results = [] + for result_item in results: + if isinstance(result_item, BaseException): + logger.warning("Chunk summarization task failed: %s", result_item) + continue + successful_results.append(result_item) + summaries = [] - for chunk_idx, summary in sorted(results, key=lambda x: x[0]): + for chunk_idx, summary in sorted(successful_results, key=lambda x: x[0]): if summary: summaries.append(f"## Section {chunk_idx + 1}\n{summary}") @@ -1038,10 +1047,16 @@ async def process_single_result(result): # Run all LLM processing in parallel results_list = response.get('results', []) tasks = [process_single_result(result) for result in results_list] - processed_results = await asyncio.gather(*tasks) - + # Use return_exceptions=True so a single task failure does not + # discard all other successfully processed results. + processed_results = await asyncio.gather(*tasks, return_exceptions=True) + # Collect metrics and print results - for result, metrics, status in processed_results: + for result_item in processed_results: + if isinstance(result_item, BaseException): + logger.warning("Web result processing task failed: %s", result_item) + continue + result, metrics, status = result_item url = result.get('url', 'Unknown URL') if status == "processed": debug_call_data["compression_metrics"].append(metrics) @@ -1285,8 +1300,14 @@ async def _process_tavily_crawl(result): return result, metrics, "too_short" tasks = [_process_tavily_crawl(r) for r in response.get('results', [])] - processed_results = await asyncio.gather(*tasks) - for result, metrics, status in processed_results: + # Use return_exceptions=True so a single task failure does not + # discard all other successfully processed crawl results. + processed_results = await asyncio.gather(*tasks, return_exceptions=True) + for result_item in processed_results: + if isinstance(result_item, BaseException): + logger.warning("Tavily crawl processing task failed: %s", result_item) + continue + result, metrics, status = result_item if status == "processed": debug_call_data["compression_metrics"].append(metrics) debug_call_data["pages_processed_with_llm"] += 1 diff --git a/toolsets.py b/toolsets.py index c664136c52a0..d0a00ab90088 100644 --- a/toolsets.py +++ b/toolsets.py @@ -169,18 +169,7 @@ "tools": ["send_message"], "includes": [] }, - - "rl": { - "description": "RL training tools for running reinforcement learning on Tinker-Atropos", - "tools": [ - "rl_list_environments", "rl_select_environment", - "rl_get_current_config", "rl_edit_config", - "rl_start_training", "rl_check_status", - "rl_stop_training", "rl_get_results", - "rl_list_runs", "rl_test_inference" - ], - "includes": [] - }, + "file": { "description": "File manipulation tools: read, write, patch (with fuzzy matching), and search (content + files)", @@ -390,7 +379,7 @@ # Mirrors hermes-cli so cron's "default" toolset is the same set of # core tools users see interactively โ€” then `hermes tools` filters # them down per the platform config. _DEFAULT_OFF_TOOLSETS (moa, - # homeassistant, rl) are excluded by _get_platform_tools() unless + # homeassistant) are excluded by _get_platform_tools() unless # the user explicitly enables them. "description": "Default cron toolset - same core tools as hermes-cli; gated by `hermes tools`", "tools": _HERMES_CORE_TOOLS, @@ -515,6 +504,24 @@ "includes": [] }, + "hermes-tlon": { + "description": "Tlon bot toolset - decentralized Urbit messaging (full access)", + "tools": _HERMES_CORE_TOOLS, + "includes": [] + }, + + "hermes-tlon-safe": { + "description": "Tlon bot safe toolset - chat, web, memory, and messaging without local code execution", + "tools": [ + "web_search", "web_extract", + "vision_analyze", "image_generate", + "skills_list", "skill_view", + "todo", "memory", "session_search", "clarify", + "text_to_speech", "send_message", + ], + "includes": [] + }, + "hermes-sms": { "description": "SMS bot toolset - interact with Hermes via SMS (Twilio)", "tools": _HERMES_CORE_TOOLS, @@ -530,7 +537,7 @@ "hermes-gateway": { "description": "Gateway toolset - union of all messaging platform tools", "tools": [], - "includes": ["hermes-telegram", "hermes-discord", "hermes-whatsapp", "hermes-slack", "hermes-signal", "hermes-bluebubbles", "hermes-homeassistant", "hermes-email", "hermes-sms", "hermes-mattermost", "hermes-matrix", "hermes-dingtalk", "hermes-feishu", "hermes-wecom", "hermes-wecom-callback", "hermes-weixin", "hermes-qqbot", "hermes-webhook", "hermes-yuanbao"] + "includes": ["hermes-telegram", "hermes-discord", "hermes-whatsapp", "hermes-slack", "hermes-signal", "hermes-bluebubbles", "hermes-homeassistant", "hermes-email", "hermes-sms", "hermes-mattermost", "hermes-matrix", "hermes-dingtalk", "hermes-feishu", "hermes-wecom", "hermes-wecom-callback", "hermes-weixin", "hermes-qqbot", "hermes-webhook", "hermes-yuanbao", "hermes-tlon"] } } diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 230387ce23b0..4a9bc2b65903 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -287,6 +287,9 @@ def _finalize_session(session: dict | None, end_reason: str = "tui_close") -> No if not session or session.get("_finalized"): return session["_finalized"] = True + stop_event = session.get("_notif_stop") + if stop_event is not None: + stop_event.set() agent = session.get("agent") lock = session.get("history_lock") @@ -579,6 +582,7 @@ def _build() -> None: pass _wire_callbacks(sid) + _sessions[sid]["_notif_stop"] = _start_notification_poller(sid, _sessions[sid]) _notify_session_boundary("on_session_reset", key) info = _session_info(agent) @@ -1955,6 +1959,7 @@ def _init_session(sid: str, key: str, agent, history: list, cols: int = 80): # session startup resilient). pass _wire_callbacks(sid) + _sessions[sid]["_notif_stop"] = _start_notification_poller(sid, _sessions[sid]) _notify_session_boundary("on_session_reset", key) _emit("session.info", sid, _session_info(agent)) @@ -3027,6 +3032,105 @@ def run_after_agent_ready() -> None: return _ok(rid, {"status": "streaming"}) +def _notification_poller_loop( + stop_event: threading.Event, sid: str, session: dict +) -> None: + """Poll completion_queue and dispatch notifications autonomously. + + Runs in a daemon thread started by _init_session(). Emits a + status.update (kind=process) for user visibility, then chains an + agent turn via _run_prompt_submit if the session is idle. + + NOTE: The completion_queue is global (one per process). If multiple + TUI sessions coexist, whichever poller wakes first grabs the event, + even if the process was started by a different session. This matches + CLI/gateway behavior (single session per process). + """ + from tools.process_registry import process_registry, format_process_notification + + while not stop_event.is_set() and not session.get("_finalized"): + try: + evt = process_registry.completion_queue.get(timeout=0.5) + except Exception: + continue + + _evt_sid = evt.get("session_id", "") + if evt.get("type") == "completion" and process_registry.is_completion_consumed(_evt_sid): + continue + + text = format_process_notification(evt) + if not text: + continue + + _emit("status.update", sid, {"kind": "process", "text": text}) + + with session["history_lock"]: + if session.get("running"): + process_registry.completion_queue.put(evt) + continue + session["running"] = True + + rid = f"__notif__{int(time.time() * 1000)}" + try: + _emit("message.start", sid) + _run_prompt_submit(rid, sid, session, text) + except Exception as exc: + print( + f"[tui_gateway] notification poller dispatch failed: " + f"{type(exc).__name__}: {exc}", + file=sys.stderr, + ) + with session["history_lock"]: + session["running"] = False + + # Drain any remaining events after stop signal (process all pending + # before exiting so nothing is lost on shutdown). + while not process_registry.completion_queue.empty(): + try: + evt = process_registry.completion_queue.get_nowait() + except Exception: + break + _evt_sid = evt.get("session_id", "") + if evt.get("type") == "completion" and process_registry.is_completion_consumed(_evt_sid): + continue + text = format_process_notification(evt) + if not text: + continue + + _emit("status.update", sid, {"kind": "process", "text": text}) + + with session["history_lock"]: + if session.get("running"): + process_registry.completion_queue.put(evt) + break + session["running"] = True + + rid = f"__notif__{int(time.time() * 1000)}" + try: + _emit("message.start", sid) + _run_prompt_submit(rid, sid, session, text) + except Exception as exc: + print( + f"[tui_gateway] notification poller dispatch failed: " + f"{type(exc).__name__}: {exc}", + file=sys.stderr, + ) + with session["history_lock"]: + session["running"] = False + + +def _start_notification_poller(sid: str, session: dict) -> threading.Event: + """Start the background notification poller for a TUI session.""" + stop = threading.Event() + t = threading.Thread( + target=_notification_poller_loop, + args=(stop, sid, session), + daemon=True, + ) + t.start() + return stop + + def _run_prompt_submit(rid, sid: str, session: dict, text: Any) -> None: with session["history_lock"]: history = list(session["history"]) @@ -3385,6 +3489,36 @@ def _stream(delta): with session["history_lock"]: session["running"] = False + # Drain completion notifications that arrived during this turn. + # The background poller handles between-turn delivery; this is + # the safety net for events that arrived mid-turn. + try: + from tools.process_registry import process_registry + + for _evt, synth in process_registry.drain_notifications(): + with session["history_lock"]: + if session.get("running"): + process_registry.completion_queue.put(_evt) + break + session["running"] = True + try: + _emit("message.start", sid) + _run_prompt_submit(rid, sid, session, synth) + except Exception as _n_exc: + print( + f"[tui_gateway] completion notification dispatch failed: " + f"{type(_n_exc).__name__}: {_n_exc}", + file=sys.stderr, + ) + with session["history_lock"]: + session["running"] = False + except Exception as _drain_exc: + print( + f"[tui_gateway] completion queue drain failed: " + f"{type(_drain_exc).__name__}: {_drain_exc}", + file=sys.stderr, + ) + threading.Thread(target=run, daemon=True).start() diff --git a/ui-tui/src/__tests__/textInputFastEcho.test.ts b/ui-tui/src/__tests__/textInputFastEcho.test.ts new file mode 100644 index 000000000000..7f246f19f21e --- /dev/null +++ b/ui-tui/src/__tests__/textInputFastEcho.test.ts @@ -0,0 +1,136 @@ +import { describe, expect, it } from 'vitest' + +import { canFastAppendShape, canFastBackspaceShape } from '../components/textInput.js' + +// The fast-echo path bypasses Ink and writes characters directly to stdout +// for the common case of typing plain English at the end of the line. These +// tests pin the shape preconditions that make that bypass safe. +// +// Regression intent: any non-ASCII text โ€” Vietnamese precomposed letters +// (one grapheme, `text.length === 1`, `stringWidth === 1`, but produced +// via IME composition across multiple keystrokes), combining marks +// (zero width), CJK (double width), emoji (variable width), or anything +// that could be produced by an in-flight IME composition โ€” must NOT +// take the bypass. Closes: +// - "TUI is experiencing font errors when using Unicode to type Vietnamese" +// - #5221 TUI input box renders incorrectly for CJK / East-Asian wide +// - #7443 CLI TUI renders and deletes Chinese characters incorrectly +// - #17602 / #17603 Chinese text scattering / ghosting + +describe('canFastAppendShape', () => { + const COLS = 40 + + it('accepts plain ASCII appended at end of single-line input', () => { + expect(canFastAppendShape('hello', 5, 'x', COLS, 5)).toBe(true) + expect(canFastAppendShape('hello', 5, ' world', COLS, 5)).toBe(true) + }) + + it('rejects when cursor is not at end of line', () => { + expect(canFastAppendShape('hello', 3, 'x', COLS, 5)).toBe(false) + }) + + it('rejects when current is empty (placeholder render path needed)', () => { + expect(canFastAppendShape('', 0, 'x', COLS, 0)).toBe(false) + }) + + it('rejects when current contains a newline (multi-line layout)', () => { + expect(canFastAppendShape('hi\nthere', 8, 'x', COLS, 5)).toBe(false) + }) + + it('rejects when appending would hit the wrap column', () => { + // Reaching cols on append must trigger a wrap, which the bypass + // cannot draw. Stay strictly below cols. + expect(canFastAppendShape('hello', 5, 'x', 6, 5)).toBe(false) + }) + + // -- Regression coverage: Vietnamese / combining marks / IME -- + + it('rejects Vietnamese precomposed letter แป (U+1EC1) โ€” IME composition path', () => { + // 'แป' is one grapheme, length 1, width 1, but Vietnamese Telex/IME + // produces it via a multi-key composition. Fast-echo would commit the + // intermediate state to stdout and desync once the final commit + // arrives. + expect(canFastAppendShape('hello', 5, 'แป', COLS, 5)).toBe(false) + }) + + it('rejects Vietnamese tone marks ฤƒ, ฦก, ฦฐ (Latin-Extended-A/B)', () => { + for (const ch of ['ฤƒ', 'แบฏ', 'ฦก', 'แป', 'ฦฐ', 'แปฑ']) { + expect(canFastAppendShape('hello', 5, ch, COLS, 5)).toBe(false) + } + }) + + it('rejects NFD combining marks (U+0300 grave, U+0301 acute, U+0302 circumflex)', () => { + // Decomposed Vietnamese: 'e' + combining circumflex + combining grave + // = 'แป'. Each combining mark is zero-width but length 1; without the + // ASCII guard the second/third keypress would be fast-echoed and + // desync the cell column. + expect(canFastAppendShape('hello', 5, '\u0300', COLS, 5)).toBe(false) + expect(canFastAppendShape('hello', 5, '\u0301', COLS, 5)).toBe(false) + expect(canFastAppendShape('hello', 5, '\u0302', COLS, 5)).toBe(false) + }) + + it('rejects CJK (East-Asian wide) characters', () => { + expect(canFastAppendShape('hello', 5, 'ไฝ ', COLS, 5)).toBe(false) + expect(canFastAppendShape('hello', 5, 'ๆ—ฅๆœฌ', COLS, 5)).toBe(false) + }) + + it('rejects emoji', () => { + expect(canFastAppendShape('hello', 5, '๐Ÿ™‚', COLS, 5)).toBe(false) + }) + + it('rejects ANSI-bearing or control text', () => { + expect(canFastAppendShape('hello', 5, '\x1b[31m', COLS, 5)).toBe(false) + expect(canFastAppendShape('hello', 5, '\t', COLS, 5)).toBe(false) + expect(canFastAppendShape('hello', 5, '\x7f', COLS, 5)).toBe(false) + }) + + it('rejects NBSP and Latin-1 letters that would change the line shape', () => { + expect(canFastAppendShape('hello', 5, '\u00a0', COLS, 5)).toBe(false) + expect(canFastAppendShape('hello', 5, 'รฉ', COLS, 5)).toBe(false) + expect(canFastAppendShape('hello', 5, 'รฑ', COLS, 5)).toBe(false) + }) +}) + +describe('canFastBackspaceShape', () => { + it('accepts deleting the last ASCII char', () => { + expect(canFastBackspaceShape('hello', 5)).toBe(true) + }) + + it('rejects when cursor is not at end', () => { + expect(canFastBackspaceShape('hello', 3)).toBe(false) + }) + + it('rejects when there is nothing to delete', () => { + expect(canFastBackspaceShape('', 0)).toBe(false) + expect(canFastBackspaceShape('hello', 0)).toBe(false) + }) + + it('rejects when value contains a newline', () => { + expect(canFastBackspaceShape('hi\nthere', 8)).toBe(false) + }) + + it('rejects deleting Vietnamese precomposed letter แป', () => { + // The "\b \b" shortcut clears one terminal cell; that's fine for a + // 1-cell ASCII char but if the previous grapheme is a Vietnamese + // letter that the IME may still be holding open, we want Ink to + // re-render so composition state stays consistent. + expect(canFastBackspaceShape('helloแป', 'helloแป'.length)).toBe(false) + }) + + it('rejects deleting a CJK character (2 cells)', () => { + expect(canFastBackspaceShape('hiไฝ ', 'hiไฝ '.length)).toBe(false) + }) + + it('rejects deleting a NFD-composed grapheme with combining marks', () => { + // 'e' + U+0302 (circumflex) + U+0300 (grave) โ€” final grapheme is one + // cluster but the previous-grapheme slice is multi-codepoint. Width + // is 1 but the bypass would be unsafe because the rendered cell + // already contained the combined glyph. + const s = 'hello' + 'e\u0302\u0300' + expect(canFastBackspaceShape(s, s.length)).toBe(false) + }) + + it('rejects deleting an emoji', () => { + expect(canFastBackspaceShape('hi๐Ÿ™‚', 'hi๐Ÿ™‚'.length)).toBe(false) + }) +}) diff --git a/ui-tui/src/components/textInput.tsx b/ui-tui/src/components/textInput.tsx index 0c63ceb93c85..91e109fa366a 100644 --- a/ui-tui/src/components/textInput.tsx +++ b/ui-tui/src/components/textInput.tsx @@ -179,6 +179,84 @@ export function lineNav(s: string, p: number, dir: -1 | 1): null | number { export { offsetFromPosition } +const ASCII_PRINTABLE_RE = /^[\x20-\x7e]+$/ + +/** + * Pure shape-only precondition for the fast-echo append path. + * + * The fast-echo path bypasses Ink's renderer and writes text directly to + * stdout, so the stored value, the rendered terminal cells, and the cursor + * column must all stay in sync without any layout work. We only allow it + * when the inserted text is pure printable ASCII so that: + * + * - `text.length` matches the number of grapheme clusters (no combining + * marks, no surrogate pairs, no precomposed CJK / Latin-Extended + * letters that an IME might still be holding open as a composition), + * - terminal width is exactly 1 cell per character (no East-Asian wide, + * no zero-width, no ambiguous-width fonts), + * - input methods (Vietnamese Telex, IME, dead-keys) cannot leak + * intermediate composition bytes through the bypass before the final + * commit arrives โ€” those always go through the normal Ink render path + * and stay layout-accurate (closes #5221, #7443, #17602/#17603). + * + * We deliberately do NOT just check `stringWidth(text) === text.length`: + * Vietnamese precomposed letters like "แป" (U+1EC1) report width 1 and + * length 1 but are still produced by IME compositions and must not be + * fast-echoed. + */ +export function canFastAppendShape( + current: string, + cursor: number, + text: string, + columns: number, + currentLineWidth: number +): boolean { + if (cursor !== current.length) { + return false + } + + if (current.length === 0) { + return false + } + + if (current.includes('\n')) { + return false + } + + if (!ASCII_PRINTABLE_RE.test(text)) { + return false + } + + return currentLineWidth + text.length < Math.max(1, columns) +} + +/** + * Pure shape-only precondition for the fast-echo backspace path. + * + * Same reasoning as canFastAppendShape โ€” only allow the direct + * "\b \b" stdout shortcut when the deleted grapheme is pure printable + * ASCII. Anything else (combining marks, IME compositions, wide chars, + * tabs, ANSI fragments) goes through the normal render path so Ink can + * recompute cell widths. + */ +export function canFastBackspaceShape(current: string, cursor: number): boolean { + if (cursor !== current.length) { + return false + } + + if (cursor <= 0) { + return false + } + + if (current.includes('\n')) { + return false + } + + const removed = current.slice(prevPos(current, cursor), cursor) + + return ASCII_PRINTABLE_RE.test(removed) +} + function renderWithCursor(value: string, cursor: number) { const pos = Math.max(0, Math.min(cursor, value.length)) @@ -444,26 +522,11 @@ export function TextInput({ const canFastEchoBase = () => focus && termFocus && !selected && !mask && !!stdout?.isTTY - const canFastAppend = (current: string, cursor: number, text: string) => { - const sw = stringWidth(text) - - return ( - canFastEchoBase() && - cursor === current.length && - current.length > 0 && - !current.includes('\n') && - sw === text.length && - lineWidthRef.current + sw < Math.max(1, columns) - ) - } - - const canFastBackspace = (current: string, cursor: number) => { - if (!canFastEchoBase() || cursor !== current.length || cursor <= 0 || current.includes('\n')) { - return false - } + const canFastAppend = (current: string, cursor: number, text: string) => + canFastEchoBase() && canFastAppendShape(current, cursor, text, columns, lineWidthRef.current) - return stringWidth(current.slice(prevPos(current, cursor), cursor)) === 1 - } + const canFastBackspace = (current: string, cursor: number) => + canFastEchoBase() && canFastBackspaceShape(current, cursor) const commit = ( next: string, diff --git a/uv.lock b/uv.lock index a519cc2b1948..2508637a0814 100644 --- a/uv.lock +++ b/uv.lock @@ -301,22 +301,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/72/9e/c394b4e2104766fb28a1e44e3ed36e4c7773b4d05c868e482be99d5635c9/alibabacloud_tea_util-0.3.14-py3-none-any.whl", hash = "sha256:10d3e5c340d8f7ec69dd27345eb2fc5a1dab07875742525edf07bbe86db93bfe", size = 6697, upload-time = "2025-11-19T06:01:07.355Z" }, ] -[[package]] -name = "altair" -version = "6.0.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "jinja2", marker = "python_full_version >= '3.12'" }, - { name = "jsonschema", marker = "python_full_version >= '3.12'" }, - { name = "narwhals", marker = "python_full_version >= '3.12'" }, - { name = "packaging", marker = "python_full_version >= '3.12'" }, - { name = "typing-extensions", marker = "python_full_version >= '3.12' and python_full_version < '3.15'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/f7/c0/184a89bd5feba14ff3c41cfaf1dd8a82c05f5ceedbc92145e17042eb08a4/altair-6.0.0.tar.gz", hash = "sha256:614bf5ecbe2337347b590afb111929aa9c16c9527c4887d96c9bc7f6640756b4", size = 763834, upload-time = "2025-11-12T08:59:11.519Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/db/33/ef2f2409450ef6daa61459d5de5c08128e7d3edb773fefd0a324d1310238/altair-6.0.0-py3-none-any.whl", hash = "sha256:09ae95b53d5fe5b16987dccc785a7af8588f2dca50de1e7a156efa8a461515f8", size = 795410, upload-time = "2025-11-12T08:59:09.804Z" }, -] - [[package]] name = "annotated-doc" version = "0.0.4" @@ -354,15 +338,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/63/5f/67db29c6e5d16c8c9c4652d3efb934d89cb750cad201539141781d8eae14/anthropic-0.86.0-py3-none-any.whl", hash = "sha256:9d2bbd339446acce98858c5627d33056efe01f70435b22b63546fe7edae0cd57", size = 469400, upload-time = "2026-03-18T18:43:06.526Z" }, ] -[[package]] -name = "antlr4-python3-runtime" -version = "4.13.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/33/5f/2cdf6f7aca3b20d3f316e9f505292e1f256a32089bd702034c29ebde6242/antlr4_python3_runtime-4.13.2.tar.gz", hash = "sha256:909b647e1d2fc2b70180ac586df3933e38919c85f98ccc656a96cd3f25ef3916", size = 117467, upload-time = "2024-08-03T19:00:12.757Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/89/03/a851e84fcbb85214dc637b6378121ef9a0dd61b4c65264675d8a5c9b1ae7/antlr4_python3_runtime-4.13.2-py3-none-any.whl", hash = "sha256:fe3835eb8d33daece0e799090eda89719dbccee7aa39ef94eed3818cafa5a7e8", size = 144462, upload-time = "2024-08-03T19:00:11.134Z" }, -] - [[package]] name = "anyio" version = "4.12.1" @@ -436,34 +411,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3c/d7/8fb3044eaef08a310acfe23dae9a8e2e07d305edc29a53497e52bc76eca7/asyncpg-0.31.0-cp314-cp314t-win_amd64.whl", hash = "sha256:bd4107bb7cdd0e9e65fae66a62afd3a249663b844fa34d479f6d5b3bef9c04c3", size = 706062, upload-time = "2025-11-24T23:26:44.086Z" }, ] -[[package]] -name = "atroposlib" -version = "0.4.0" -source = { git = "https://github.com/NousResearch/atropos.git?rev=c20c85256e5a45ad31edf8b7276e9c5ee1995a30#c20c85256e5a45ad31edf8b7276e9c5ee1995a30" } -dependencies = [ - { name = "aiofiles" }, - { name = "aiohttp" }, - { name = "datasets" }, - { name = "fastapi" }, - { name = "gymnasium" }, - { name = "hf-transfer" }, - { name = "jinja2" }, - { name = "jsonlines" }, - { name = "markdown" }, - { name = "math-verify" }, - { name = "nltk" }, - { name = "numpy" }, - { name = "openai" }, - { name = "polars" }, - { name = "pydantic-cli" }, - { name = "rich" }, - { name = "tenacity" }, - { name = "tqdm" }, - { name = "transformers" }, - { name = "uvicorn", extra = ["standard"] }, - { name = "wandb" }, -] - [[package]] name = "attrs" version = "25.4.0" @@ -562,15 +509,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/4a/45/ec96b29162a402fc4c1c5512d114d7b3787b9d1c2ec241d9568b4816ee23/base58-2.1.1-py3-none-any.whl", hash = "sha256:11a36f4d3ce51dfc1043f3218591ac4eb1ceb172919cebe05b52a5bcc8d245c2", size = 5621, upload-time = "2021-10-30T22:12:16.658Z" }, ] -[[package]] -name = "blinker" -version = "1.9.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/21/28/9b3f50ce0e048515135495f198351908d99540d69bfdc8c1d15b73dc55ce/blinker-1.9.0.tar.gz", hash = "sha256:b4ce2265a7abece45e7cc896e98dbebe6cead56bcf805a3d23136d145f5445bf", size = 22460, upload-time = "2024-11-08T17:25:47.436Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/10/cb/f2ad4230dc2eb1a74edf38f1a38b9b52277f75bef262d8908e60d957e13c/blinker-1.9.0-py3-none-any.whl", hash = "sha256:ba0efaa9080b619ff2f3459d1d500c57bddea4a6b424b60a91141db6fd2f08bc", size = 8458, upload-time = "2024-11-08T17:25:46.184Z" }, -] - [[package]] name = "boto3" version = "1.42.89" @@ -600,12 +538,28 @@ wheels = [ ] [[package]] -name = "cachetools" -version = "5.5.2" +name = "brotlicffi" +version = "1.2.0.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/6c/81/3747dad6b14fa2cf53fcf10548cf5aea6913e96fab41a3c198676f8948a5/cachetools-5.5.2.tar.gz", hash = "sha256:1a661caa9175d26759571b2e19580f9d6393969e5dfca11fdb1f947a23e640d4", size = 28380, upload-time = "2025-02-20T21:01:19.524Z" } +dependencies = [ + { name = "cffi" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8a/b6/017dc5f852ed9b8735af77774509271acbf1de02d238377667145fcee01d/brotlicffi-1.2.0.1.tar.gz", hash = "sha256:c20d5c596278307ad06414a6d95a892377ea274a5c6b790c2548c009385d621c", size = 478156, upload-time = "2026-03-05T19:54:11.547Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/72/76/20fa66124dbe6be5cafeb312ece67de6b61dd91a0247d1ea13db4ebb33c2/cachetools-5.5.2-py3-none-any.whl", hash = "sha256:d26a22bcc62eb95c3beabd9f1ee5e820d3d2704fe2967cbe350e20c8ffcd3f0a", size = 10080, upload-time = "2025-02-20T21:01:16.647Z" }, + { url = "https://files.pythonhosted.org/packages/ef/f9/dfa56316837fa798eac19358351e974de8e1e2ca9475af4cb90293cd6576/brotlicffi-1.2.0.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2c85e65913cf2b79c57a3fdd05b98d9731d9255dc0cb696b09376cc091b9cddd", size = 433046, upload-time = "2026-03-05T19:53:46.209Z" }, + { url = "https://files.pythonhosted.org/packages/4a/f5/f8f492158c76b0d940388801f04f747028971ad5774287bded5f1e53f08d/brotlicffi-1.2.0.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:535f2d05d0273408abc13fc0eebb467afac17b0ad85090c8913690d40207dac5", size = 1541126, upload-time = "2026-03-05T19:53:48.248Z" }, + { url = "https://files.pythonhosted.org/packages/3b/e1/ff87af10ac419600c63e9287a0649c673673ae6b4f2bcf48e96cb2f89f60/brotlicffi-1.2.0.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ce17eb798ca59ecec67a9bb3fd7a4304e120d1cd02953ce522d959b9a84d58ac", size = 1541983, upload-time = "2026-03-05T19:53:50.317Z" }, + { url = "https://files.pythonhosted.org/packages/47/c0/80ecd9bd45776109fab14040e478bf63e456967c9ddee2353d8330ed8de1/brotlicffi-1.2.0.1-cp314-cp314t-win32.whl", hash = "sha256:3c9544f83cb715d95d7eab3af4adbbef8b2093ad6382288a83b3a25feb1a57ec", size = 349047, upload-time = "2026-03-05T19:53:52.215Z" }, + { url = "https://files.pythonhosted.org/packages/ab/98/13e5b250236a281b6cd9e92a01ee1ae231029fa78faee932ef3766e1cb24/brotlicffi-1.2.0.1-cp314-cp314t-win_amd64.whl", hash = "sha256:625f8115d32ae9c0740d01ea51518437c3fbaa3e78d41cb18459f6f7ac326000", size = 385652, upload-time = "2026-03-05T19:53:53.892Z" }, + { url = "https://files.pythonhosted.org/packages/9a/9f/b98dcd4af47994cee97aebac866996a006a2e5fc1fd1e2b82a8ad95cf09c/brotlicffi-1.2.0.1-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:91ba5f0ccc040f6ff8f7efaf839f797723d03ed46acb8ae9408f99ffd2572cf4", size = 432608, upload-time = "2026-03-05T19:53:56.736Z" }, + { url = "https://files.pythonhosted.org/packages/b1/7a/ac4ee56595a061e3718a6d1ea7e921f4df156894acffb28ed88a1fd52022/brotlicffi-1.2.0.1-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:be9a670c6811af30a4bd42d7116dc5895d3b41beaa8ed8a89050447a0181f5ce", size = 1534257, upload-time = "2026-03-05T19:53:58.667Z" }, + { url = "https://files.pythonhosted.org/packages/99/39/e7410db7f6f56de57744ea52a115084ceb2735f4d44973f349bb92136586/brotlicffi-1.2.0.1-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6f3314a3476f59e5443f9f72a6dff16edc0c3463c9b318feaef04ae3e4683f5a", size = 1536838, upload-time = "2026-03-05T19:54:00.705Z" }, + { url = "https://files.pythonhosted.org/packages/a6/75/6e7977d1935fc3fbb201cbd619be8f2c7aea25d40a096967132854b34708/brotlicffi-1.2.0.1-cp38-abi3-win32.whl", hash = "sha256:82ea52e2b5d3145b6c406ebd3efb0d55db718b7ad996bd70c62cec0439de1187", size = 343337, upload-time = "2026-03-05T19:54:02.446Z" }, + { url = "https://files.pythonhosted.org/packages/d8/ef/e7e485ce5e4ba3843a0a92feb767c7b6098fd6e65ce752918074d175ae71/brotlicffi-1.2.0.1-cp38-abi3-win_amd64.whl", hash = "sha256:da2e82a08e7778b8bc539d27ca03cdd684113e81394bfaaad8d0dfc6a17ddede", size = 379026, upload-time = "2026-03-05T19:54:04.322Z" }, + { url = "https://files.pythonhosted.org/packages/7f/53/6262c2256513e6f530d81642477cb19367270922063eaa2d7b781d8c723d/brotlicffi-1.2.0.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:e015af99584c6db1490a69a210c765953e473e63adc2d891ac3062a737c9e851", size = 402265, upload-time = "2026-03-05T19:54:05.858Z" }, + { url = "https://files.pythonhosted.org/packages/1f/d9/d5340b43cf5fbe7fe5a083d237e5338cc1caa73bea523be1c5e452c26290/brotlicffi-1.2.0.1-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:37cb587d32bf7168e2218c455e22e409ad1f3157c6c71945879a311f3e6b6abf", size = 406710, upload-time = "2026-03-05T19:54:07.272Z" }, + { url = "https://files.pythonhosted.org/packages/a3/82/dbced4c1e0792efdf23fd90ff6d2a320c64ff4dfef7aacc85c04fde9ddd2/brotlicffi-1.2.0.1-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9d6ba65dd528892b4d9960beba2ae011a753620bcfc66cf6fa3cee18d7b0baa4", size = 402787, upload-time = "2026-03-05T19:54:08.73Z" }, + { url = "https://files.pythonhosted.org/packages/ef/6f/534205ba7590c9a8716a614f270c5c2ec419b5b7079b3f9cd31b7b5580de/brotlicffi-1.2.0.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:f2a5575653b0672638ba039b82fda56854934d7a6a24d4b8b5033f73ab43cbc1", size = 375108, upload-time = "2026-03-05T19:54:10.079Z" }, ] [[package]] @@ -809,15 +763,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/98/78/01c019cdb5d6498122777c1a43056ebb3ebfeef2076d9d026bfe15583b2b/click-8.3.1-py3-none-any.whl", hash = "sha256:981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6", size = 108274, upload-time = "2025-11-15T20:45:41.139Z" }, ] -[[package]] -name = "cloudpickle" -version = "3.1.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/27/fb/576f067976d320f5f0114a8d9fa1215425441bb35627b1993e5afd8111e5/cloudpickle-3.1.2.tar.gz", hash = "sha256:7fda9eb655c9c230dab534f1983763de5835249750e85fbcef43aaa30a9a2414", size = 22330, upload-time = "2025-11-03T09:25:26.604Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/88/39/799be3f2f0f38cc727ee3b4f1445fe6d5e4133064ec2e4115069418a5bb6/cloudpickle-3.1.2-py3-none-any.whl", hash = "sha256:9acb47f6afd73f60dc1df93bb801b472f05ff42fa6c84167d25cb206be1fbf4a", size = 22228, upload-time = "2025-11-03T09:25:25.534Z" }, -] - [[package]] name = "colorama" version = "0.4.6" @@ -827,88 +772,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, ] -[[package]] -name = "contourpy" -version = "1.3.3" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "numpy", marker = "python_full_version >= '3.12'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/58/01/1253e6698a07380cd31a736d248a3f2a50a7c88779a1813da27503cadc2a/contourpy-1.3.3.tar.gz", hash = "sha256:083e12155b210502d0bca491432bb04d56dc3432f95a979b429f2848c3dbe880", size = 13466174, upload-time = "2025-07-26T12:03:12.549Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/91/2e/c4390a31919d8a78b90e8ecf87cd4b4c4f05a5b48d05ec17db8e5404c6f4/contourpy-1.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:709a48ef9a690e1343202916450bc48b9e51c049b089c7f79a267b46cffcdaa1", size = 288773, upload-time = "2025-07-26T12:01:02.277Z" }, - { url = "https://files.pythonhosted.org/packages/0d/44/c4b0b6095fef4dc9c420e041799591e3b63e9619e3044f7f4f6c21c0ab24/contourpy-1.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:23416f38bfd74d5d28ab8429cc4d63fa67d5068bd711a85edb1c3fb0c3e2f381", size = 270149, upload-time = "2025-07-26T12:01:04.072Z" }, - { url = "https://files.pythonhosted.org/packages/30/2e/dd4ced42fefac8470661d7cb7e264808425e6c5d56d175291e93890cce09/contourpy-1.3.3-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:929ddf8c4c7f348e4c0a5a3a714b5c8542ffaa8c22954862a46ca1813b667ee7", size = 329222, upload-time = "2025-07-26T12:01:05.688Z" }, - { url = "https://files.pythonhosted.org/packages/f2/74/cc6ec2548e3d276c71389ea4802a774b7aa3558223b7bade3f25787fafc2/contourpy-1.3.3-cp311-cp311-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:9e999574eddae35f1312c2b4b717b7885d4edd6cb46700e04f7f02db454e67c1", size = 377234, upload-time = "2025-07-26T12:01:07.054Z" }, - { url = "https://files.pythonhosted.org/packages/03/b3/64ef723029f917410f75c09da54254c5f9ea90ef89b143ccadb09df14c15/contourpy-1.3.3-cp311-cp311-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0bf67e0e3f482cb69779dd3061b534eb35ac9b17f163d851e2a547d56dba0a3a", size = 380555, upload-time = "2025-07-26T12:01:08.801Z" }, - { url = "https://files.pythonhosted.org/packages/5f/4b/6157f24ca425b89fe2eb7e7be642375711ab671135be21e6faa100f7448c/contourpy-1.3.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:51e79c1f7470158e838808d4a996fa9bac72c498e93d8ebe5119bc1e6becb0db", size = 355238, upload-time = "2025-07-26T12:01:10.319Z" }, - { url = "https://files.pythonhosted.org/packages/98/56/f914f0dd678480708a04cfd2206e7c382533249bc5001eb9f58aa693e200/contourpy-1.3.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:598c3aaece21c503615fd59c92a3598b428b2f01bfb4b8ca9c4edeecc2438620", size = 1326218, upload-time = "2025-07-26T12:01:12.659Z" }, - { url = "https://files.pythonhosted.org/packages/fb/d7/4a972334a0c971acd5172389671113ae82aa7527073980c38d5868ff1161/contourpy-1.3.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:322ab1c99b008dad206d406bb61d014cf0174df491ae9d9d0fac6a6fda4f977f", size = 1392867, upload-time = "2025-07-26T12:01:15.533Z" }, - { url = "https://files.pythonhosted.org/packages/75/3e/f2cc6cd56dc8cff46b1a56232eabc6feea52720083ea71ab15523daab796/contourpy-1.3.3-cp311-cp311-win32.whl", hash = "sha256:fd907ae12cd483cd83e414b12941c632a969171bf90fc937d0c9f268a31cafff", size = 183677, upload-time = "2025-07-26T12:01:17.088Z" }, - { url = "https://files.pythonhosted.org/packages/98/4b/9bd370b004b5c9d8045c6c33cf65bae018b27aca550a3f657cdc99acdbd8/contourpy-1.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:3519428f6be58431c56581f1694ba8e50626f2dd550af225f82fb5f5814d2a42", size = 225234, upload-time = "2025-07-26T12:01:18.256Z" }, - { url = "https://files.pythonhosted.org/packages/d9/b6/71771e02c2e004450c12b1120a5f488cad2e4d5b590b1af8bad060360fe4/contourpy-1.3.3-cp311-cp311-win_arm64.whl", hash = "sha256:15ff10bfada4bf92ec8b31c62bf7c1834c244019b4a33095a68000d7075df470", size = 193123, upload-time = "2025-07-26T12:01:19.848Z" }, - { url = "https://files.pythonhosted.org/packages/be/45/adfee365d9ea3d853550b2e735f9d66366701c65db7855cd07621732ccfc/contourpy-1.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b08a32ea2f8e42cf1d4be3169a98dd4be32bafe4f22b6c4cb4ba810fa9e5d2cb", size = 293419, upload-time = "2025-07-26T12:01:21.16Z" }, - { url = "https://files.pythonhosted.org/packages/53/3e/405b59cfa13021a56bba395a6b3aca8cec012b45bf177b0eaf7a202cde2c/contourpy-1.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:556dba8fb6f5d8742f2923fe9457dbdd51e1049c4a43fd3986a0b14a1d815fc6", size = 273979, upload-time = "2025-07-26T12:01:22.448Z" }, - { url = "https://files.pythonhosted.org/packages/d4/1c/a12359b9b2ca3a845e8f7f9ac08bdf776114eb931392fcad91743e2ea17b/contourpy-1.3.3-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:92d9abc807cf7d0e047b95ca5d957cf4792fcd04e920ca70d48add15c1a90ea7", size = 332653, upload-time = "2025-07-26T12:01:24.155Z" }, - { url = "https://files.pythonhosted.org/packages/63/12/897aeebfb475b7748ea67b61e045accdfcf0d971f8a588b67108ed7f5512/contourpy-1.3.3-cp312-cp312-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b2e8faa0ed68cb29af51edd8e24798bb661eac3bd9f65420c1887b6ca89987c8", size = 379536, upload-time = "2025-07-26T12:01:25.91Z" }, - { url = "https://files.pythonhosted.org/packages/43/8a/a8c584b82deb248930ce069e71576fc09bd7174bbd35183b7943fb1064fd/contourpy-1.3.3-cp312-cp312-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:626d60935cf668e70a5ce6ff184fd713e9683fb458898e4249b63be9e28286ea", size = 384397, upload-time = "2025-07-26T12:01:27.152Z" }, - { url = "https://files.pythonhosted.org/packages/cc/8f/ec6289987824b29529d0dfda0d74a07cec60e54b9c92f3c9da4c0ac732de/contourpy-1.3.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4d00e655fcef08aba35ec9610536bfe90267d7ab5ba944f7032549c55a146da1", size = 362601, upload-time = "2025-07-26T12:01:28.808Z" }, - { url = "https://files.pythonhosted.org/packages/05/0a/a3fe3be3ee2dceb3e615ebb4df97ae6f3828aa915d3e10549ce016302bd1/contourpy-1.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:451e71b5a7d597379ef572de31eeb909a87246974d960049a9848c3bc6c41bf7", size = 1331288, upload-time = "2025-07-26T12:01:31.198Z" }, - { url = "https://files.pythonhosted.org/packages/33/1d/acad9bd4e97f13f3e2b18a3977fe1b4a37ecf3d38d815333980c6c72e963/contourpy-1.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:459c1f020cd59fcfe6650180678a9993932d80d44ccde1fa1868977438f0b411", size = 1403386, upload-time = "2025-07-26T12:01:33.947Z" }, - { url = "https://files.pythonhosted.org/packages/cf/8f/5847f44a7fddf859704217a99a23a4f6417b10e5ab1256a179264561540e/contourpy-1.3.3-cp312-cp312-win32.whl", hash = "sha256:023b44101dfe49d7d53932be418477dba359649246075c996866106da069af69", size = 185018, upload-time = "2025-07-26T12:01:35.64Z" }, - { url = "https://files.pythonhosted.org/packages/19/e8/6026ed58a64563186a9ee3f29f41261fd1828f527dd93d33b60feca63352/contourpy-1.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:8153b8bfc11e1e4d75bcb0bff1db232f9e10b274e0929de9d608027e0d34ff8b", size = 226567, upload-time = "2025-07-26T12:01:36.804Z" }, - { url = "https://files.pythonhosted.org/packages/d1/e2/f05240d2c39a1ed228d8328a78b6f44cd695f7ef47beb3e684cf93604f86/contourpy-1.3.3-cp312-cp312-win_arm64.whl", hash = "sha256:07ce5ed73ecdc4a03ffe3e1b3e3c1166db35ae7584be76f65dbbe28a7791b0cc", size = 193655, upload-time = "2025-07-26T12:01:37.999Z" }, - { url = "https://files.pythonhosted.org/packages/68/35/0167aad910bbdb9599272bd96d01a9ec6852f36b9455cf2ca67bd4cc2d23/contourpy-1.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:177fb367556747a686509d6fef71d221a4b198a3905fe824430e5ea0fda54eb5", size = 293257, upload-time = "2025-07-26T12:01:39.367Z" }, - { url = "https://files.pythonhosted.org/packages/96/e4/7adcd9c8362745b2210728f209bfbcf7d91ba868a2c5f40d8b58f54c509b/contourpy-1.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d002b6f00d73d69333dac9d0b8d5e84d9724ff9ef044fd63c5986e62b7c9e1b1", size = 274034, upload-time = "2025-07-26T12:01:40.645Z" }, - { url = "https://files.pythonhosted.org/packages/73/23/90e31ceeed1de63058a02cb04b12f2de4b40e3bef5e082a7c18d9c8ae281/contourpy-1.3.3-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:348ac1f5d4f1d66d3322420f01d42e43122f43616e0f194fc1c9f5d830c5b286", size = 334672, upload-time = "2025-07-26T12:01:41.942Z" }, - { url = "https://files.pythonhosted.org/packages/ed/93/b43d8acbe67392e659e1d984700e79eb67e2acb2bd7f62012b583a7f1b55/contourpy-1.3.3-cp313-cp313-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:655456777ff65c2c548b7c454af9c6f33f16c8884f11083244b5819cc214f1b5", size = 381234, upload-time = "2025-07-26T12:01:43.499Z" }, - { url = "https://files.pythonhosted.org/packages/46/3b/bec82a3ea06f66711520f75a40c8fc0b113b2a75edb36aa633eb11c4f50f/contourpy-1.3.3-cp313-cp313-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:644a6853d15b2512d67881586bd03f462c7ab755db95f16f14d7e238f2852c67", size = 385169, upload-time = "2025-07-26T12:01:45.219Z" }, - { url = "https://files.pythonhosted.org/packages/4b/32/e0f13a1c5b0f8572d0ec6ae2f6c677b7991fafd95da523159c19eff0696a/contourpy-1.3.3-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4debd64f124ca62069f313a9cb86656ff087786016d76927ae2cf37846b006c9", size = 362859, upload-time = "2025-07-26T12:01:46.519Z" }, - { url = "https://files.pythonhosted.org/packages/33/71/e2a7945b7de4e58af42d708a219f3b2f4cff7386e6b6ab0a0fa0033c49a9/contourpy-1.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a15459b0f4615b00bbd1e91f1b9e19b7e63aea7483d03d804186f278c0af2659", size = 1332062, upload-time = "2025-07-26T12:01:48.964Z" }, - { url = "https://files.pythonhosted.org/packages/12/fc/4e87ac754220ccc0e807284f88e943d6d43b43843614f0a8afa469801db0/contourpy-1.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ca0fdcd73925568ca027e0b17ab07aad764be4706d0a925b89227e447d9737b7", size = 1403932, upload-time = "2025-07-26T12:01:51.979Z" }, - { url = "https://files.pythonhosted.org/packages/a6/2e/adc197a37443f934594112222ac1aa7dc9a98faf9c3842884df9a9d8751d/contourpy-1.3.3-cp313-cp313-win32.whl", hash = "sha256:b20c7c9a3bf701366556e1b1984ed2d0cedf999903c51311417cf5f591d8c78d", size = 185024, upload-time = "2025-07-26T12:01:53.245Z" }, - { url = "https://files.pythonhosted.org/packages/18/0b/0098c214843213759692cc638fce7de5c289200a830e5035d1791d7a2338/contourpy-1.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:1cadd8b8969f060ba45ed7c1b714fe69185812ab43bd6b86a9123fe8f99c3263", size = 226578, upload-time = "2025-07-26T12:01:54.422Z" }, - { url = "https://files.pythonhosted.org/packages/8a/9a/2f6024a0c5995243cd63afdeb3651c984f0d2bc727fd98066d40e141ad73/contourpy-1.3.3-cp313-cp313-win_arm64.whl", hash = "sha256:fd914713266421b7536de2bfa8181aa8c699432b6763a0ea64195ebe28bff6a9", size = 193524, upload-time = "2025-07-26T12:01:55.73Z" }, - { url = "https://files.pythonhosted.org/packages/c0/b3/f8a1a86bd3298513f500e5b1f5fd92b69896449f6cab6a146a5d52715479/contourpy-1.3.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:88df9880d507169449d434c293467418b9f6cbe82edd19284aa0409e7fdb933d", size = 306730, upload-time = "2025-07-26T12:01:57.051Z" }, - { url = "https://files.pythonhosted.org/packages/3f/11/4780db94ae62fc0c2053909b65dc3246bd7cecfc4f8a20d957ad43aa4ad8/contourpy-1.3.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:d06bb1f751ba5d417047db62bca3c8fde202b8c11fb50742ab3ab962c81e8216", size = 287897, upload-time = "2025-07-26T12:01:58.663Z" }, - { url = "https://files.pythonhosted.org/packages/ae/15/e59f5f3ffdd6f3d4daa3e47114c53daabcb18574a26c21f03dc9e4e42ff0/contourpy-1.3.3-cp313-cp313t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e4e6b05a45525357e382909a4c1600444e2a45b4795163d3b22669285591c1ae", size = 326751, upload-time = "2025-07-26T12:02:00.343Z" }, - { url = "https://files.pythonhosted.org/packages/0f/81/03b45cfad088e4770b1dcf72ea78d3802d04200009fb364d18a493857210/contourpy-1.3.3-cp313-cp313t-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ab3074b48c4e2cf1a960e6bbeb7f04566bf36b1861d5c9d4d8ac04b82e38ba20", size = 375486, upload-time = "2025-07-26T12:02:02.128Z" }, - { url = "https://files.pythonhosted.org/packages/0c/ba/49923366492ffbdd4486e970d421b289a670ae8cf539c1ea9a09822b371a/contourpy-1.3.3-cp313-cp313t-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:6c3d53c796f8647d6deb1abe867daeb66dcc8a97e8455efa729516b997b8ed99", size = 388106, upload-time = "2025-07-26T12:02:03.615Z" }, - { url = "https://files.pythonhosted.org/packages/9f/52/5b00ea89525f8f143651f9f03a0df371d3cbd2fccd21ca9b768c7a6500c2/contourpy-1.3.3-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:50ed930df7289ff2a8d7afeb9603f8289e5704755c7e5c3bbd929c90c817164b", size = 352548, upload-time = "2025-07-26T12:02:05.165Z" }, - { url = "https://files.pythonhosted.org/packages/32/1d/a209ec1a3a3452d490f6b14dd92e72280c99ae3d1e73da74f8277d4ee08f/contourpy-1.3.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:4feffb6537d64b84877da813a5c30f1422ea5739566abf0bd18065ac040e120a", size = 1322297, upload-time = "2025-07-26T12:02:07.379Z" }, - { url = "https://files.pythonhosted.org/packages/bc/9e/46f0e8ebdd884ca0e8877e46a3f4e633f6c9c8c4f3f6e72be3fe075994aa/contourpy-1.3.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:2b7e9480ffe2b0cd2e787e4df64270e3a0440d9db8dc823312e2c940c167df7e", size = 1391023, upload-time = "2025-07-26T12:02:10.171Z" }, - { url = "https://files.pythonhosted.org/packages/b9/70/f308384a3ae9cd2209e0849f33c913f658d3326900d0ff5d378d6a1422d2/contourpy-1.3.3-cp313-cp313t-win32.whl", hash = "sha256:283edd842a01e3dcd435b1c5116798d661378d83d36d337b8dde1d16a5fc9ba3", size = 196157, upload-time = "2025-07-26T12:02:11.488Z" }, - { url = "https://files.pythonhosted.org/packages/b2/dd/880f890a6663b84d9e34a6f88cded89d78f0091e0045a284427cb6b18521/contourpy-1.3.3-cp313-cp313t-win_amd64.whl", hash = "sha256:87acf5963fc2b34825e5b6b048f40e3635dd547f590b04d2ab317c2619ef7ae8", size = 240570, upload-time = "2025-07-26T12:02:12.754Z" }, - { url = "https://files.pythonhosted.org/packages/80/99/2adc7d8ffead633234817ef8e9a87115c8a11927a94478f6bb3d3f4d4f7d/contourpy-1.3.3-cp313-cp313t-win_arm64.whl", hash = "sha256:3c30273eb2a55024ff31ba7d052dde990d7d8e5450f4bbb6e913558b3d6c2301", size = 199713, upload-time = "2025-07-26T12:02:14.4Z" }, - { url = "https://files.pythonhosted.org/packages/72/8b/4546f3ab60f78c514ffb7d01a0bd743f90de36f0019d1be84d0a708a580a/contourpy-1.3.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fde6c716d51c04b1c25d0b90364d0be954624a0ee9d60e23e850e8d48353d07a", size = 292189, upload-time = "2025-07-26T12:02:16.095Z" }, - { url = "https://files.pythonhosted.org/packages/fd/e1/3542a9cb596cadd76fcef413f19c79216e002623158befe6daa03dbfa88c/contourpy-1.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:cbedb772ed74ff5be440fa8eee9bd49f64f6e3fc09436d9c7d8f1c287b121d77", size = 273251, upload-time = "2025-07-26T12:02:17.524Z" }, - { url = "https://files.pythonhosted.org/packages/b1/71/f93e1e9471d189f79d0ce2497007731c1e6bf9ef6d1d61b911430c3db4e5/contourpy-1.3.3-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:22e9b1bd7a9b1d652cd77388465dc358dafcd2e217d35552424aa4f996f524f5", size = 335810, upload-time = "2025-07-26T12:02:18.9Z" }, - { url = "https://files.pythonhosted.org/packages/91/f9/e35f4c1c93f9275d4e38681a80506b5510e9327350c51f8d4a5a724d178c/contourpy-1.3.3-cp314-cp314-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a22738912262aa3e254e4f3cb079a95a67132fc5a063890e224393596902f5a4", size = 382871, upload-time = "2025-07-26T12:02:20.418Z" }, - { url = "https://files.pythonhosted.org/packages/b5/71/47b512f936f66a0a900d81c396a7e60d73419868fba959c61efed7a8ab46/contourpy-1.3.3-cp314-cp314-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:afe5a512f31ee6bd7d0dda52ec9864c984ca3d66664444f2d72e0dc4eb832e36", size = 386264, upload-time = "2025-07-26T12:02:21.916Z" }, - { url = "https://files.pythonhosted.org/packages/04/5f/9ff93450ba96b09c7c2b3f81c94de31c89f92292f1380261bd7195bea4ea/contourpy-1.3.3-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f64836de09927cba6f79dcd00fdd7d5329f3fccc633468507079c829ca4db4e3", size = 363819, upload-time = "2025-07-26T12:02:23.759Z" }, - { url = "https://files.pythonhosted.org/packages/3e/a6/0b185d4cc480ee494945cde102cb0149ae830b5fa17bf855b95f2e70ad13/contourpy-1.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:1fd43c3be4c8e5fd6e4f2baeae35ae18176cf2e5cced681cca908addf1cdd53b", size = 1333650, upload-time = "2025-07-26T12:02:26.181Z" }, - { url = "https://files.pythonhosted.org/packages/43/d7/afdc95580ca56f30fbcd3060250f66cedbde69b4547028863abd8aa3b47e/contourpy-1.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:6afc576f7b33cf00996e5c1102dc2a8f7cc89e39c0b55df93a0b78c1bd992b36", size = 1404833, upload-time = "2025-07-26T12:02:28.782Z" }, - { url = "https://files.pythonhosted.org/packages/e2/e2/366af18a6d386f41132a48f033cbd2102e9b0cf6345d35ff0826cd984566/contourpy-1.3.3-cp314-cp314-win32.whl", hash = "sha256:66c8a43a4f7b8df8b71ee1840e4211a3c8d93b214b213f590e18a1beca458f7d", size = 189692, upload-time = "2025-07-26T12:02:30.128Z" }, - { url = "https://files.pythonhosted.org/packages/7d/c2/57f54b03d0f22d4044b8afb9ca0e184f8b1afd57b4f735c2fa70883dc601/contourpy-1.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:cf9022ef053f2694e31d630feaacb21ea24224be1c3ad0520b13d844274614fd", size = 232424, upload-time = "2025-07-26T12:02:31.395Z" }, - { url = "https://files.pythonhosted.org/packages/18/79/a9416650df9b525737ab521aa181ccc42d56016d2123ddcb7b58e926a42c/contourpy-1.3.3-cp314-cp314-win_arm64.whl", hash = "sha256:95b181891b4c71de4bb404c6621e7e2390745f887f2a026b2d99e92c17892339", size = 198300, upload-time = "2025-07-26T12:02:32.956Z" }, - { url = "https://files.pythonhosted.org/packages/1f/42/38c159a7d0f2b7b9c04c64ab317042bb6952b713ba875c1681529a2932fe/contourpy-1.3.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:33c82d0138c0a062380332c861387650c82e4cf1747aaa6938b9b6516762e772", size = 306769, upload-time = "2025-07-26T12:02:34.2Z" }, - { url = "https://files.pythonhosted.org/packages/c3/6c/26a8205f24bca10974e77460de68d3d7c63e282e23782f1239f226fcae6f/contourpy-1.3.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:ea37e7b45949df430fe649e5de8351c423430046a2af20b1c1961cae3afcda77", size = 287892, upload-time = "2025-07-26T12:02:35.807Z" }, - { url = "https://files.pythonhosted.org/packages/66/06/8a475c8ab718ebfd7925661747dbb3c3ee9c82ac834ccb3570be49d129f4/contourpy-1.3.3-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d304906ecc71672e9c89e87c4675dc5c2645e1f4269a5063b99b0bb29f232d13", size = 326748, upload-time = "2025-07-26T12:02:37.193Z" }, - { url = "https://files.pythonhosted.org/packages/b4/a3/c5ca9f010a44c223f098fccd8b158bb1cb287378a31ac141f04730dc49be/contourpy-1.3.3-cp314-cp314t-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ca658cd1a680a5c9ea96dc61cdbae1e85c8f25849843aa799dfd3cb370ad4fbe", size = 375554, upload-time = "2025-07-26T12:02:38.894Z" }, - { url = "https://files.pythonhosted.org/packages/80/5b/68bd33ae63fac658a4145088c1e894405e07584a316738710b636c6d0333/contourpy-1.3.3-cp314-cp314t-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ab2fd90904c503739a75b7c8c5c01160130ba67944a7b77bbf36ef8054576e7f", size = 388118, upload-time = "2025-07-26T12:02:40.642Z" }, - { url = "https://files.pythonhosted.org/packages/40/52/4c285a6435940ae25d7410a6c36bda5145839bc3f0beb20c707cda18b9d2/contourpy-1.3.3-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b7301b89040075c30e5768810bc96a8e8d78085b47d8be6e4c3f5a0b4ed478a0", size = 352555, upload-time = "2025-07-26T12:02:42.25Z" }, - { url = "https://files.pythonhosted.org/packages/24/ee/3e81e1dd174f5c7fefe50e85d0892de05ca4e26ef1c9a59c2a57e43b865a/contourpy-1.3.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:2a2a8b627d5cc6b7c41a4beff6c5ad5eb848c88255fda4a8745f7e901b32d8e4", size = 1322295, upload-time = "2025-07-26T12:02:44.668Z" }, - { url = "https://files.pythonhosted.org/packages/3c/b2/6d913d4d04e14379de429057cd169e5e00f6c2af3bb13e1710bcbdb5da12/contourpy-1.3.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:fd6ec6be509c787f1caf6b247f0b1ca598bef13f4ddeaa126b7658215529ba0f", size = 1391027, upload-time = "2025-07-26T12:02:47.09Z" }, - { url = "https://files.pythonhosted.org/packages/93/8a/68a4ec5c55a2971213d29a9374913f7e9f18581945a7a31d1a39b5d2dfe5/contourpy-1.3.3-cp314-cp314t-win32.whl", hash = "sha256:e74a9a0f5e3fff48fb5a7f2fd2b9b70a3fe014a67522f79b7cca4c0c7e43c9ae", size = 202428, upload-time = "2025-07-26T12:02:48.691Z" }, - { url = "https://files.pythonhosted.org/packages/fa/96/fd9f641ffedc4fa3ace923af73b9d07e869496c9cc7a459103e6e978992f/contourpy-1.3.3-cp314-cp314t-win_amd64.whl", hash = "sha256:13b68d6a62db8eafaebb8039218921399baf6e47bf85006fd8529f2a08ef33fc", size = 250331, upload-time = "2025-07-26T12:02:50.137Z" }, - { url = "https://files.pythonhosted.org/packages/ae/8c/469afb6465b853afff216f9528ffda78a915ff880ed58813ba4faf4ba0b6/contourpy-1.3.3-cp314-cp314t-win_arm64.whl", hash = "sha256:b7448cb5a725bb1e35ce88771b86fba35ef418952474492cf7c764059933ff8b", size = 203831, upload-time = "2025-07-26T12:02:51.449Z" }, - { url = "https://files.pythonhosted.org/packages/a5/29/8dcfe16f0107943fa92388c23f6e05cff0ba58058c4c95b00280d4c75a14/contourpy-1.3.3-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:cd5dfcaeb10f7b7f9dc8941717c6c2ade08f587be2226222c12b25f0483ed497", size = 278809, upload-time = "2025-07-26T12:02:52.74Z" }, - { url = "https://files.pythonhosted.org/packages/85/a9/8b37ef4f7dafeb335daee3c8254645ef5725be4d9c6aa70b50ec46ef2f7e/contourpy-1.3.3-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:0c1fc238306b35f246d61a1d416a627348b5cf0648648a031e14bb8705fcdfe8", size = 261593, upload-time = "2025-07-26T12:02:54.037Z" }, - { url = "https://files.pythonhosted.org/packages/0a/59/ebfb8c677c75605cc27f7122c90313fd2f375ff3c8d19a1694bda74aaa63/contourpy-1.3.3-pp311-pypy311_pp73-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:70f9aad7de812d6541d29d2bbf8feb22ff7e1c299523db288004e3157ff4674e", size = 302202, upload-time = "2025-07-26T12:02:55.947Z" }, - { url = "https://files.pythonhosted.org/packages/3c/37/21972a15834d90bfbfb009b9d004779bd5a07a0ec0234e5ba8f64d5736f4/contourpy-1.3.3-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5ed3657edf08512fc3fe81b510e35c2012fbd3081d2e26160f27ca28affec989", size = 329207, upload-time = "2025-07-26T12:02:57.468Z" }, - { url = "https://files.pythonhosted.org/packages/0c/58/bd257695f39d05594ca4ad60df5bcb7e32247f9951fd09a9b8edb82d1daa/contourpy-1.3.3-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:3d1a3799d62d45c18bafd41c5fa05120b96a28079f2393af559b843d1a966a77", size = 225315, upload-time = "2025-07-26T12:02:58.801Z" }, -] - [[package]] name = "croniter" version = "6.0.0" @@ -1018,15 +881,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0e/5c/9fa0ad6462b62efd0fb5ac1100eee47bc96ecc198ff4e237c731e5473616/ctranslate2-4.7.1-cp314-cp314t-win_amd64.whl", hash = "sha256:dfb7657bdb7b8211c8f9ecb6f3b70bc0db0e0384d01a8b1808cb66fe7199df59", size = 19123451, upload-time = "2026-02-04T06:12:24.115Z" }, ] -[[package]] -name = "cycler" -version = "0.12.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a9/95/a3dbbb5028f35eafb79008e7522a75244477d2838f38cbb722248dabc2a8/cycler-0.12.1.tar.gz", hash = "sha256:88bb128f02ba341da8ef447245a9e138fae777f6a23943da4540077d3601eb1c", size = 7615, upload-time = "2023-10-07T05:32:18.335Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e7/05/c19819d5e3d95294a6f5947fb9b9629efb316b96de511b418c53d245aae6/cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30", size = 8321, upload-time = "2023-10-07T05:32:16.783Z" }, -] - [[package]] name = "darabonba-core" version = "1.0.5" @@ -1040,31 +894,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/66/d3/a7daaee544c904548e665829b51a9fa2572acb82c73ad787a8ff90273002/darabonba_core-1.0.5-py3-none-any.whl", hash = "sha256:671ab8dbc4edc2a8f88013da71646839bb8914f1259efc069353243ef52ea27c", size = 24580, upload-time = "2025-12-12T07:53:59.494Z" }, ] -[[package]] -name = "datasets" -version = "4.8.4" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "dill" }, - { name = "filelock" }, - { name = "fsspec", extra = ["http"] }, - { name = "httpx" }, - { name = "huggingface-hub" }, - { name = "multiprocess" }, - { name = "numpy" }, - { name = "packaging" }, - { name = "pandas" }, - { name = "pyarrow" }, - { name = "pyyaml" }, - { name = "requests" }, - { name = "tqdm" }, - { name = "xxhash" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/22/22/73e46ac7a8c25e7ef0b3bd6f10da3465021d90219a32eb0b4d2afea4c56e/datasets-4.8.4.tar.gz", hash = "sha256:a1429ed853275ce7943a01c6d2e25475b4501eb758934362106a280470df3a52", size = 604382, upload-time = "2026-03-23T14:21:17.987Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/b0/e5/247d094108e42ac26363ab8dc57f168840cf7c05774b40ffeb0d78868fcc/datasets-4.8.4-py3-none-any.whl", hash = "sha256:cdc8bee4698e549d78bf1fed6aea2eebc760b22b084f07e6fc020c6577a6ce6d", size = 526991, upload-time = "2026-03-23T14:21:15.89Z" }, -] - [[package]] name = "davey" version = "0.1.4" @@ -1290,15 +1119,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/84/d0/205d54408c08b13550c733c4b85429e7ead111c7f0014309637425520a9a/deprecated-1.3.1-py2.py3-none-any.whl", hash = "sha256:597bfef186b6f60181535a29fbe44865ce137a5079f295b479886c82729d5f3f", size = 11298, upload-time = "2025-10-30T08:19:00.758Z" }, ] -[[package]] -name = "dill" -version = "0.4.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/81/e1/56027a71e31b02ddc53c7d65b01e68edf64dea2932122fe7746a516f75d5/dill-0.4.1.tar.gz", hash = "sha256:423092df4182177d4d8ba8290c8a5b640c66ab35ec7da59ccfa00f6fa3eea5fa", size = 187315, upload-time = "2026-01-19T02:36:56.85Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/1e/77/dc8c558f7593132cf8fefec57c4f60c83b16941c574ac5f619abb3ae7933/dill-0.4.1-py3-none-any.whl", hash = "sha256:1e1ce33e978ae97fcfcff5638477032b801c46c7c65cf717f95fbc2248f79a9d", size = 120019, upload-time = "2026-01-19T02:36:55.663Z" }, -] - [[package]] name = "dingtalk-stream" version = "0.24.3" @@ -1436,15 +1256,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6a/48/265c2935467ac1dbcb7c5b54cd8a2f579cbb263db6bfc0e0c8fe4bc79c02/fal_client-0.13.1-py3-none-any.whl", hash = "sha256:967a01f3a4112d485a30f8f3a0e678c6ff5b919eb9c5d480315cfc30a79fc037", size = 19265, upload-time = "2026-02-20T07:21:28.143Z" }, ] -[[package]] -name = "farama-notifications" -version = "0.0.4" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/2e/2c/8384832b7a6b1fd6ba95bbdcae26e7137bb3eedc955c42fd5cdcc086cfbf/Farama-Notifications-0.0.4.tar.gz", hash = "sha256:13fceff2d14314cf80703c8266462ebf3733c7d165336eee998fc58e545efd18", size = 2131, upload-time = "2023-02-27T18:28:41.047Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/05/2c/ffc08c54c05cdce6fbed2aeebc46348dbe180c6d2c541c7af7ba0aa5f5f8/Farama_Notifications-0.0.4-py3-none-any.whl", hash = "sha256:14de931035a41961f7c056361dc7f980762a143d05791ef5794a751a2caf05ae", size = 2511, upload-time = "2023-02-27T18:28:39.447Z" }, -] - [[package]] name = "fastapi" version = "0.133.1" @@ -1477,58 +1288,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/05/99/49ee85903dee060d9f08297b4a342e5e0bcfca2f027a07b4ee0a38ab13f9/faster_whisper-1.2.1-py3-none-any.whl", hash = "sha256:79a66ad50688c0b794dd501dc340a736992a6342f7f95e5811be60b5224a26a7", size = 1118909, upload-time = "2025-10-31T11:35:47.794Z" }, ] -[[package]] -name = "fastuuid" -version = "0.14.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/c3/7d/d9daedf0f2ebcacd20d599928f8913e9d2aea1d56d2d355a93bfa2b611d7/fastuuid-0.14.0.tar.gz", hash = "sha256:178947fc2f995b38497a74172adee64fdeb8b7ec18f2a5934d037641ba265d26", size = 18232, upload-time = "2025-10-19T22:19:22.402Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/98/f3/12481bda4e5b6d3e698fbf525df4443cc7dce746f246b86b6fcb2fba1844/fastuuid-0.14.0-cp311-cp311-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:73946cb950c8caf65127d4e9a325e2b6be0442a224fd51ba3b6ac44e1912ce34", size = 516386, upload-time = "2025-10-19T22:42:40.176Z" }, - { url = "https://files.pythonhosted.org/packages/59/19/2fc58a1446e4d72b655648eb0879b04e88ed6fa70d474efcf550f640f6ec/fastuuid-0.14.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:12ac85024637586a5b69645e7ed986f7535106ed3013640a393a03e461740cb7", size = 264569, upload-time = "2025-10-19T22:25:50.977Z" }, - { url = "https://files.pythonhosted.org/packages/78/29/3c74756e5b02c40cfcc8b1d8b5bac4edbd532b55917a6bcc9113550e99d1/fastuuid-0.14.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:05a8dde1f395e0c9b4be515b7a521403d1e8349443e7641761af07c7ad1624b1", size = 254366, upload-time = "2025-10-19T22:29:49.166Z" }, - { url = "https://files.pythonhosted.org/packages/52/96/d761da3fccfa84f0f353ce6e3eb8b7f76b3aa21fd25e1b00a19f9c80a063/fastuuid-0.14.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09378a05020e3e4883dfdab438926f31fea15fd17604908f3d39cbeb22a0b4dc", size = 278978, upload-time = "2025-10-19T22:35:41.306Z" }, - { url = "https://files.pythonhosted.org/packages/fc/c2/f84c90167cc7765cb82b3ff7808057608b21c14a38531845d933a4637307/fastuuid-0.14.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bbb0c4b15d66b435d2538f3827f05e44e2baafcc003dd7d8472dc67807ab8fd8", size = 279692, upload-time = "2025-10-19T22:25:36.997Z" }, - { url = "https://files.pythonhosted.org/packages/af/7b/4bacd03897b88c12348e7bd77943bac32ccf80ff98100598fcff74f75f2e/fastuuid-0.14.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:cd5a7f648d4365b41dbf0e38fe8da4884e57bed4e77c83598e076ac0c93995e7", size = 303384, upload-time = "2025-10-19T22:29:46.578Z" }, - { url = "https://files.pythonhosted.org/packages/c0/a2/584f2c29641df8bd810d00c1f21d408c12e9ad0c0dafdb8b7b29e5ddf787/fastuuid-0.14.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c0a94245afae4d7af8c43b3159d5e3934c53f47140be0be624b96acd672ceb73", size = 460921, upload-time = "2025-10-19T22:36:42.006Z" }, - { url = "https://files.pythonhosted.org/packages/24/68/c6b77443bb7764c760e211002c8638c0c7cce11cb584927e723215ba1398/fastuuid-0.14.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:2b29e23c97e77c3a9514d70ce343571e469098ac7f5a269320a0f0b3e193ab36", size = 480575, upload-time = "2025-10-19T22:28:18.975Z" }, - { url = "https://files.pythonhosted.org/packages/5a/87/93f553111b33f9bb83145be12868c3c475bf8ea87c107063d01377cc0e8e/fastuuid-0.14.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1e690d48f923c253f28151b3a6b4e335f2b06bf669c68a02665bc150b7839e94", size = 452317, upload-time = "2025-10-19T22:25:32.75Z" }, - { url = "https://files.pythonhosted.org/packages/9e/8c/a04d486ca55b5abb7eaa65b39df8d891b7b1635b22db2163734dc273579a/fastuuid-0.14.0-cp311-cp311-win32.whl", hash = "sha256:a6f46790d59ab38c6aa0e35c681c0484b50dc0acf9e2679c005d61e019313c24", size = 154804, upload-time = "2025-10-19T22:24:15.615Z" }, - { url = "https://files.pythonhosted.org/packages/9c/b2/2d40bf00820de94b9280366a122cbaa60090c8cf59e89ac3938cf5d75895/fastuuid-0.14.0-cp311-cp311-win_amd64.whl", hash = "sha256:e150eab56c95dc9e3fefc234a0eedb342fac433dacc273cd4d150a5b0871e1fa", size = 156099, upload-time = "2025-10-19T22:24:31.646Z" }, - { url = "https://files.pythonhosted.org/packages/02/a2/e78fcc5df65467f0d207661b7ef86c5b7ac62eea337c0c0fcedbeee6fb13/fastuuid-0.14.0-cp312-cp312-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:77e94728324b63660ebf8adb27055e92d2e4611645bf12ed9d88d30486471d0a", size = 510164, upload-time = "2025-10-19T22:31:45.635Z" }, - { url = "https://files.pythonhosted.org/packages/2b/b3/c846f933f22f581f558ee63f81f29fa924acd971ce903dab1a9b6701816e/fastuuid-0.14.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:caa1f14d2102cb8d353096bc6ef6c13b2c81f347e6ab9d6fbd48b9dea41c153d", size = 261837, upload-time = "2025-10-19T22:38:38.53Z" }, - { url = "https://files.pythonhosted.org/packages/54/ea/682551030f8c4fa9a769d9825570ad28c0c71e30cf34020b85c1f7ee7382/fastuuid-0.14.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d23ef06f9e67163be38cece704170486715b177f6baae338110983f99a72c070", size = 251370, upload-time = "2025-10-19T22:40:26.07Z" }, - { url = "https://files.pythonhosted.org/packages/14/dd/5927f0a523d8e6a76b70968e6004966ee7df30322f5fc9b6cdfb0276646a/fastuuid-0.14.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0c9ec605ace243b6dbe3bd27ebdd5d33b00d8d1d3f580b39fdd15cd96fd71796", size = 277766, upload-time = "2025-10-19T22:37:23.779Z" }, - { url = "https://files.pythonhosted.org/packages/16/6e/c0fb547eef61293153348f12e0f75a06abb322664b34a1573a7760501336/fastuuid-0.14.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:808527f2407f58a76c916d6aa15d58692a4a019fdf8d4c32ac7ff303b7d7af09", size = 278105, upload-time = "2025-10-19T22:26:56.821Z" }, - { url = "https://files.pythonhosted.org/packages/2d/b1/b9c75e03b768f61cf2e84ee193dc18601aeaf89a4684b20f2f0e9f52b62c/fastuuid-0.14.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2fb3c0d7fef6674bbeacdd6dbd386924a7b60b26de849266d1ff6602937675c8", size = 301564, upload-time = "2025-10-19T22:30:31.604Z" }, - { url = "https://files.pythonhosted.org/packages/fc/fa/f7395fdac07c7a54f18f801744573707321ca0cee082e638e36452355a9d/fastuuid-0.14.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ab3f5d36e4393e628a4df337c2c039069344db5f4b9d2a3c9cea48284f1dd741", size = 459659, upload-time = "2025-10-19T22:31:32.341Z" }, - { url = "https://files.pythonhosted.org/packages/66/49/c9fd06a4a0b1f0f048aacb6599e7d96e5d6bc6fa680ed0d46bf111929d1b/fastuuid-0.14.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:b9a0ca4f03b7e0b01425281ffd44e99d360e15c895f1907ca105854ed85e2057", size = 478430, upload-time = "2025-10-19T22:26:22.962Z" }, - { url = "https://files.pythonhosted.org/packages/be/9c/909e8c95b494e8e140e8be6165d5fc3f61fdc46198c1554df7b3e1764471/fastuuid-0.14.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:3acdf655684cc09e60fb7e4cf524e8f42ea760031945aa8086c7eae2eeeabeb8", size = 450894, upload-time = "2025-10-19T22:27:01.647Z" }, - { url = "https://files.pythonhosted.org/packages/90/eb/d29d17521976e673c55ef7f210d4cdd72091a9ec6755d0fd4710d9b3c871/fastuuid-0.14.0-cp312-cp312-win32.whl", hash = "sha256:9579618be6280700ae36ac42c3efd157049fe4dd40ca49b021280481c78c3176", size = 154374, upload-time = "2025-10-19T22:29:19.879Z" }, - { url = "https://files.pythonhosted.org/packages/cc/fc/f5c799a6ea6d877faec0472d0b27c079b47c86b1cdc577720a5386483b36/fastuuid-0.14.0-cp312-cp312-win_amd64.whl", hash = "sha256:d9e4332dc4ba054434a9594cbfaf7823b57993d7d8e7267831c3e059857cf397", size = 156550, upload-time = "2025-10-19T22:27:49.658Z" }, - { url = "https://files.pythonhosted.org/packages/a5/83/ae12dd39b9a39b55d7f90abb8971f1a5f3c321fd72d5aa83f90dc67fe9ed/fastuuid-0.14.0-cp313-cp313-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:77a09cb7427e7af74c594e409f7731a0cf887221de2f698e1ca0ebf0f3139021", size = 510720, upload-time = "2025-10-19T22:42:34.633Z" }, - { url = "https://files.pythonhosted.org/packages/53/b0/a4b03ff5d00f563cc7546b933c28cb3f2a07344b2aec5834e874f7d44143/fastuuid-0.14.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:9bd57289daf7b153bfa3e8013446aa144ce5e8c825e9e366d455155ede5ea2dc", size = 262024, upload-time = "2025-10-19T22:30:25.482Z" }, - { url = "https://files.pythonhosted.org/packages/9c/6d/64aee0a0f6a58eeabadd582e55d0d7d70258ffdd01d093b30c53d668303b/fastuuid-0.14.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ac60fc860cdf3c3f327374db87ab8e064c86566ca8c49d2e30df15eda1b0c2d5", size = 251679, upload-time = "2025-10-19T22:36:14.096Z" }, - { url = "https://files.pythonhosted.org/packages/60/f5/a7e9cda8369e4f7919d36552db9b2ae21db7915083bc6336f1b0082c8b2e/fastuuid-0.14.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ab32f74bd56565b186f036e33129da77db8be09178cd2f5206a5d4035fb2a23f", size = 277862, upload-time = "2025-10-19T22:36:23.302Z" }, - { url = "https://files.pythonhosted.org/packages/f0/d3/8ce11827c783affffd5bd4d6378b28eb6cc6d2ddf41474006b8d62e7448e/fastuuid-0.14.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:33e678459cf4addaedd9936bbb038e35b3f6b2061330fd8f2f6a1d80414c0f87", size = 278278, upload-time = "2025-10-19T22:29:43.809Z" }, - { url = "https://files.pythonhosted.org/packages/a2/51/680fb6352d0bbade04036da46264a8001f74b7484e2fd1f4da9e3db1c666/fastuuid-0.14.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1e3cc56742f76cd25ecb98e4b82a25f978ccffba02e4bdce8aba857b6d85d87b", size = 301788, upload-time = "2025-10-19T22:36:06.825Z" }, - { url = "https://files.pythonhosted.org/packages/fa/7c/2014b5785bd8ebdab04ec857635ebd84d5ee4950186a577db9eff0fb8ff6/fastuuid-0.14.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:cb9a030f609194b679e1660f7e32733b7a0f332d519c5d5a6a0a580991290022", size = 459819, upload-time = "2025-10-19T22:35:31.623Z" }, - { url = "https://files.pythonhosted.org/packages/01/d2/524d4ceeba9160e7a9bc2ea3e8f4ccf1ad78f3bde34090ca0c51f09a5e91/fastuuid-0.14.0-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:09098762aad4f8da3a888eb9ae01c84430c907a297b97166b8abc07b640f2995", size = 478546, upload-time = "2025-10-19T22:26:03.023Z" }, - { url = "https://files.pythonhosted.org/packages/bc/17/354d04951ce114bf4afc78e27a18cfbd6ee319ab1829c2d5fb5e94063ac6/fastuuid-0.14.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:1383fff584fa249b16329a059c68ad45d030d5a4b70fb7c73a08d98fd53bcdab", size = 450921, upload-time = "2025-10-19T22:31:02.151Z" }, - { url = "https://files.pythonhosted.org/packages/fb/be/d7be8670151d16d88f15bb121c5b66cdb5ea6a0c2a362d0dcf30276ade53/fastuuid-0.14.0-cp313-cp313-win32.whl", hash = "sha256:a0809f8cc5731c066c909047f9a314d5f536c871a7a22e815cc4967c110ac9ad", size = 154559, upload-time = "2025-10-19T22:36:36.011Z" }, - { url = "https://files.pythonhosted.org/packages/22/1d/5573ef3624ceb7abf4a46073d3554e37191c868abc3aecd5289a72f9810a/fastuuid-0.14.0-cp313-cp313-win_amd64.whl", hash = "sha256:0df14e92e7ad3276327631c9e7cec09e32572ce82089c55cb1bb8df71cf394ed", size = 156539, upload-time = "2025-10-19T22:33:35.898Z" }, - { url = "https://files.pythonhosted.org/packages/16/c9/8c7660d1fe3862e3f8acabd9be7fc9ad71eb270f1c65cce9a2b7a31329ab/fastuuid-0.14.0-cp314-cp314-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:b852a870a61cfc26c884af205d502881a2e59cc07076b60ab4a951cc0c94d1ad", size = 510600, upload-time = "2025-10-19T22:43:44.17Z" }, - { url = "https://files.pythonhosted.org/packages/4c/f4/a989c82f9a90d0ad995aa957b3e572ebef163c5299823b4027986f133dfb/fastuuid-0.14.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:c7502d6f54cd08024c3ea9b3514e2d6f190feb2f46e6dbcd3747882264bb5f7b", size = 262069, upload-time = "2025-10-19T22:43:38.38Z" }, - { url = "https://files.pythonhosted.org/packages/da/6c/a1a24f73574ac995482b1326cf7ab41301af0fabaa3e37eeb6b3df00e6e2/fastuuid-0.14.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1ca61b592120cf314cfd66e662a5b54a578c5a15b26305e1b8b618a6f22df714", size = 251543, upload-time = "2025-10-19T22:32:22.537Z" }, - { url = "https://files.pythonhosted.org/packages/1a/20/2a9b59185ba7a6c7b37808431477c2d739fcbdabbf63e00243e37bd6bf49/fastuuid-0.14.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa75b6657ec129d0abded3bec745e6f7ab642e6dba3a5272a68247e85f5f316f", size = 277798, upload-time = "2025-10-19T22:33:53.821Z" }, - { url = "https://files.pythonhosted.org/packages/ef/33/4105ca574f6ded0af6a797d39add041bcfb468a1255fbbe82fcb6f592da2/fastuuid-0.14.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a8a0dfea3972200f72d4c7df02c8ac70bad1bb4c58d7e0ec1e6f341679073a7f", size = 278283, upload-time = "2025-10-19T22:29:02.812Z" }, - { url = "https://files.pythonhosted.org/packages/fe/8c/fca59f8e21c4deb013f574eae05723737ddb1d2937ce87cb2a5d20992dc3/fastuuid-0.14.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1bf539a7a95f35b419f9ad105d5a8a35036df35fdafae48fb2fd2e5f318f0d75", size = 301627, upload-time = "2025-10-19T22:35:54.985Z" }, - { url = "https://files.pythonhosted.org/packages/cb/e2/f78c271b909c034d429218f2798ca4e89eeda7983f4257d7865976ddbb6c/fastuuid-0.14.0-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:9a133bf9cc78fdbd1179cb58a59ad0100aa32d8675508150f3658814aeefeaa4", size = 459778, upload-time = "2025-10-19T22:28:00.999Z" }, - { url = "https://files.pythonhosted.org/packages/1e/f0/5ff209d865897667a2ff3e7a572267a9ced8f7313919f6d6043aed8b1caa/fastuuid-0.14.0-cp314-cp314-musllinux_1_1_i686.whl", hash = "sha256:f54d5b36c56a2d5e1a31e73b950b28a0d83eb0c37b91d10408875a5a29494bad", size = 478605, upload-time = "2025-10-19T22:36:21.764Z" }, - { url = "https://files.pythonhosted.org/packages/e0/c8/2ce1c78f983a2c4987ea865d9516dbdfb141a120fd3abb977ae6f02ba7ca/fastuuid-0.14.0-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:ec27778c6ca3393ef662e2762dba8af13f4ec1aaa32d08d77f71f2a70ae9feb8", size = 450837, upload-time = "2025-10-19T22:34:37.178Z" }, - { url = "https://files.pythonhosted.org/packages/df/60/dad662ec9a33b4a5fe44f60699258da64172c39bd041da2994422cdc40fe/fastuuid-0.14.0-cp314-cp314-win32.whl", hash = "sha256:e23fc6a83f112de4be0cc1990e5b127c27663ae43f866353166f87df58e73d06", size = 154532, upload-time = "2025-10-19T22:35:18.217Z" }, - { url = "https://files.pythonhosted.org/packages/1f/f6/da4db31001e854025ffd26bc9ba0740a9cbba2c3259695f7c5834908b336/fastuuid-0.14.0-cp314-cp314-win_amd64.whl", hash = "sha256:df61342889d0f5e7a32f7284e55ef95103f2110fee433c2ae7c2c0956d76ac8a", size = 156457, upload-time = "2025-10-19T22:33:44.579Z" }, -] - [[package]] name = "filelock" version = "3.24.3" @@ -1576,55 +1335,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e8/2d/d2a548598be01649e2d46231d151a6c56d10b964d94043a335ae56ea2d92/flatbuffers-25.12.19-py2.py3-none-any.whl", hash = "sha256:7634f50c427838bb021c2d66a3d1168e9d199b0607e6329399f04846d42e20b4", size = 26661, upload-time = "2025-12-19T23:16:13.622Z" }, ] -[[package]] -name = "fonttools" -version = "4.62.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/9a/08/7012b00a9a5874311b639c3920270c36ee0c445b69d9989a85e5c92ebcb0/fonttools-4.62.1.tar.gz", hash = "sha256:e54c75fd6041f1122476776880f7c3c3295ffa31962dc6ebe2543c00dca58b5d", size = 3580737, upload-time = "2026-03-13T13:54:25.52Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/88/39/23ff32561ec8d45a4d48578b4d241369d9270dc50926c017570e60893701/fonttools-4.62.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:40975849bac44fb0b9253d77420c6d8b523ac4dcdcefeff6e4d706838a5b80f7", size = 2871039, upload-time = "2026-03-13T13:52:33.127Z" }, - { url = "https://files.pythonhosted.org/packages/24/7f/66d3f8a9338a9b67fe6e1739f47e1cd5cee78bd3bc1206ef9b0b982289a5/fonttools-4.62.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9dde91633f77fa576879a0c76b1d89de373cae751a98ddf0109d54e173b40f14", size = 2416346, upload-time = "2026-03-13T13:52:35.676Z" }, - { url = "https://files.pythonhosted.org/packages/aa/53/5276ceba7bff95da7793a07c5284e1da901cf00341ce5e2f3273056c0cca/fonttools-4.62.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6acb4109f8bee00fec985c8c7afb02299e35e9c94b57287f3ea542f28bd0b0a7", size = 5100897, upload-time = "2026-03-13T13:52:38.102Z" }, - { url = "https://files.pythonhosted.org/packages/cc/a1/40a5c4d8e28b0851d53a8eeeb46fbd73c325a2a9a165f290a5ed90e6c597/fonttools-4.62.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1c5c25671ce8805e0d080e2ffdeca7f1e86778c5cbfbeae86d7f866d8830517b", size = 5071078, upload-time = "2026-03-13T13:52:41.305Z" }, - { url = "https://files.pythonhosted.org/packages/e3/be/d378fca4c65ea1956fee6d90ace6e861776809cbbc5af22388a090c3c092/fonttools-4.62.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a5d8825e1140f04e6c99bb7d37a9e31c172f3bc208afbe02175339e699c710e1", size = 5076908, upload-time = "2026-03-13T13:52:44.122Z" }, - { url = "https://files.pythonhosted.org/packages/f8/d9/ae6a1d0693a4185a84605679c8a1f719a55df87b9c6e8e817bfdd9ef5936/fonttools-4.62.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:268abb1cb221e66c014acc234e872b7870d8b5d4657a83a8f4205094c32d2416", size = 5202275, upload-time = "2026-03-13T13:52:46.591Z" }, - { url = "https://files.pythonhosted.org/packages/54/6c/af95d9c4efb15cabff22642b608342f2bd67137eea6107202d91b5b03184/fonttools-4.62.1-cp311-cp311-win32.whl", hash = "sha256:942b03094d7edbb99bdf1ae7e9090898cad7bf9030b3d21f33d7072dbcb51a53", size = 2293075, upload-time = "2026-03-13T13:52:48.711Z" }, - { url = "https://files.pythonhosted.org/packages/d3/97/bf54c5b3f2be34e1f143e6db838dfdc54f2ffa3e68c738934c82f3b2a08d/fonttools-4.62.1-cp311-cp311-win_amd64.whl", hash = "sha256:e8514f4924375f77084e81467e63238b095abda5107620f49421c368a6017ed2", size = 2344593, upload-time = "2026-03-13T13:52:50.725Z" }, - { url = "https://files.pythonhosted.org/packages/47/d4/dbacced3953544b9a93088cc10ef2b596d348c983d5c67a404fa41ec51ba/fonttools-4.62.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:90365821debbd7db678809c7491ca4acd1e0779b9624cdc6ddaf1f31992bf974", size = 2870219, upload-time = "2026-03-13T13:52:53.664Z" }, - { url = "https://files.pythonhosted.org/packages/66/9e/a769c8e99b81e5a87ab7e5e7236684de4e96246aae17274e5347d11ebd78/fonttools-4.62.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:12859ff0b47dd20f110804c3e0d0970f7b832f561630cd879969011541a464a9", size = 2414891, upload-time = "2026-03-13T13:52:56.493Z" }, - { url = "https://files.pythonhosted.org/packages/69/64/f19a9e3911968c37e1e620e14dfc5778299e1474f72f4e57c5ec771d9489/fonttools-4.62.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9c125ffa00c3d9003cdaaf7f2c79e6e535628093e14b5de1dccb08859b680936", size = 5033197, upload-time = "2026-03-13T13:52:59.179Z" }, - { url = "https://files.pythonhosted.org/packages/9b/8a/99c8b3c3888c5c474c08dbfd7c8899786de9604b727fcefb055b42c84bba/fonttools-4.62.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:149f7d84afca659d1a97e39a4778794a2f83bf344c5ee5134e09995086cc2392", size = 4988768, upload-time = "2026-03-13T13:53:02.761Z" }, - { url = "https://files.pythonhosted.org/packages/d1/c6/0f904540d3e6ab463c1243a0d803504826a11604c72dd58c2949796a1762/fonttools-4.62.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0aa72c43a601cfa9273bb1ae0518f1acadc01ee181a6fc60cd758d7fdadffc04", size = 4971512, upload-time = "2026-03-13T13:53:05.678Z" }, - { url = "https://files.pythonhosted.org/packages/29/0b/5cbef6588dc9bd6b5c9ad6a4d5a8ca384d0cea089da31711bbeb4f9654a6/fonttools-4.62.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:19177c8d96c7c36359266e571c5173bcee9157b59cfc8cb0153c5673dc5a3a7d", size = 5122723, upload-time = "2026-03-13T13:53:08.662Z" }, - { url = "https://files.pythonhosted.org/packages/4a/47/b3a5342d381595ef439adec67848bed561ab7fdb1019fa522e82101b7d9c/fonttools-4.62.1-cp312-cp312-win32.whl", hash = "sha256:a24decd24d60744ee8b4679d38e88b8303d86772053afc29b19d23bb8207803c", size = 2281278, upload-time = "2026-03-13T13:53:10.998Z" }, - { url = "https://files.pythonhosted.org/packages/28/b1/0c2ab56a16f409c6c8a68816e6af707827ad5d629634691ff60a52879792/fonttools-4.62.1-cp312-cp312-win_amd64.whl", hash = "sha256:9e7863e10b3de72376280b515d35b14f5eeed639d1aa7824f4cf06779ec65e42", size = 2331414, upload-time = "2026-03-13T13:53:13.992Z" }, - { url = "https://files.pythonhosted.org/packages/3b/56/6f389de21c49555553d6a5aeed5ac9767631497ac836c4f076273d15bd72/fonttools-4.62.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:c22b1014017111c401469e3acc5433e6acf6ebcc6aa9efb538a533c800971c79", size = 2865155, upload-time = "2026-03-13T13:53:16.132Z" }, - { url = "https://files.pythonhosted.org/packages/03/c5/0e3966edd5ec668d41dfe418787726752bc07e2f5fd8c8f208615e61fa89/fonttools-4.62.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:68959f5fc58ed4599b44aad161c2837477d7f35f5f79402d97439974faebfebe", size = 2412802, upload-time = "2026-03-13T13:53:18.878Z" }, - { url = "https://files.pythonhosted.org/packages/52/94/e6ac4b44026de7786fe46e3bfa0c87e51d5d70a841054065d49cd62bb909/fonttools-4.62.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ef46db46c9447103b8f3ff91e8ba009d5fe181b1920a83757a5762551e32bb68", size = 5013926, upload-time = "2026-03-13T13:53:21.379Z" }, - { url = "https://files.pythonhosted.org/packages/e2/98/8b1e801939839d405f1f122e7d175cebe9aeb4e114f95bfc45e3152af9a7/fonttools-4.62.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6706d1cb1d5e6251a97ad3c1b9347505c5615c112e66047abbef0f8545fa30d1", size = 4964575, upload-time = "2026-03-13T13:53:23.857Z" }, - { url = "https://files.pythonhosted.org/packages/46/76/7d051671e938b1881670528fec69cc4044315edd71a229c7fd712eaa5119/fonttools-4.62.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2e7abd2b1e11736f58c1de27819e1955a53267c21732e78243fa2fa2e5c1e069", size = 4953693, upload-time = "2026-03-13T13:53:26.569Z" }, - { url = "https://files.pythonhosted.org/packages/1f/ae/b41f8628ec0be3c1b934fc12b84f4576a5c646119db4d3bdd76a217c90b5/fonttools-4.62.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:403d28ce06ebfc547fbcb0cb8b7f7cc2f7a2d3e1a67ba9a34b14632df9e080f9", size = 5094920, upload-time = "2026-03-13T13:53:29.329Z" }, - { url = "https://files.pythonhosted.org/packages/f2/f6/53a1e9469331a23dcc400970a27a4caa3d9f6edbf5baab0260285238b884/fonttools-4.62.1-cp313-cp313-win32.whl", hash = "sha256:93c316e0f5301b2adbe6a5f658634307c096fd5aae60a5b3412e4f3e1728ab24", size = 2279928, upload-time = "2026-03-13T13:53:32.352Z" }, - { url = "https://files.pythonhosted.org/packages/38/60/35186529de1db3c01f5ad625bde07c1f576305eab6d86bbda4c58445f721/fonttools-4.62.1-cp313-cp313-win_amd64.whl", hash = "sha256:7aa21ff53e28a9c2157acbc44e5b401149d3c9178107130e82d74ceb500e5056", size = 2330514, upload-time = "2026-03-13T13:53:34.991Z" }, - { url = "https://files.pythonhosted.org/packages/36/f0/2888cdac391807d68d90dcb16ef858ddc1b5309bfc6966195a459dd326e2/fonttools-4.62.1-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:fa1d16210b6b10a826d71bed68dd9ec24a9e218d5a5e2797f37c573e7ec215ca", size = 2864442, upload-time = "2026-03-13T13:53:37.509Z" }, - { url = "https://files.pythonhosted.org/packages/4b/b2/e521803081f8dc35990816b82da6360fa668a21b44da4b53fc9e77efcd62/fonttools-4.62.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:aa69d10ed420d8121118e628ad47d86e4caa79ba37f968597b958f6cceab7eca", size = 2410901, upload-time = "2026-03-13T13:53:40.55Z" }, - { url = "https://files.pythonhosted.org/packages/00/a4/8c3511ff06e53110039358dbbdc1a65d72157a054638387aa2ada300a8b8/fonttools-4.62.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bd13b7999d59c5eb1c2b442eb2d0c427cb517a0b7a1f5798fc5c9e003f5ff782", size = 4999608, upload-time = "2026-03-13T13:53:42.798Z" }, - { url = "https://files.pythonhosted.org/packages/28/63/cd0c3b26afe60995a5295f37c246a93d454023726c3261cfbb3559969bb9/fonttools-4.62.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8d337fdd49a79b0d51c4da87bc38169d21c3abbf0c1aa9367eff5c6656fb6dae", size = 4912726, upload-time = "2026-03-13T13:53:45.405Z" }, - { url = "https://files.pythonhosted.org/packages/70/b9/ac677cb07c24c685cf34f64e140617d58789d67a3dd524164b63648c6114/fonttools-4.62.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:d241cdc4a67b5431c6d7f115fdf63335222414995e3a1df1a41e1182acd4bcc7", size = 4951422, upload-time = "2026-03-13T13:53:48.326Z" }, - { url = "https://files.pythonhosted.org/packages/e6/10/11c08419a14b85b7ca9a9faca321accccc8842dd9e0b1c8a72908de05945/fonttools-4.62.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:c05557a78f8fa514da0f869556eeda40887a8abc77c76ee3f74cf241778afd5a", size = 5060979, upload-time = "2026-03-13T13:53:51.366Z" }, - { url = "https://files.pythonhosted.org/packages/4e/3c/12eea4a4cf054e7ab058ed5ceada43b46809fce2bf319017c4d63ae55bb4/fonttools-4.62.1-cp314-cp314-win32.whl", hash = "sha256:49a445d2f544ce4a69338694cad575ba97b9a75fff02720da0882d1a73f12800", size = 2283733, upload-time = "2026-03-13T13:53:53.606Z" }, - { url = "https://files.pythonhosted.org/packages/6b/67/74b070029043186b5dd13462c958cb7c7f811be0d2e634309d9a1ffb1505/fonttools-4.62.1-cp314-cp314-win_amd64.whl", hash = "sha256:1eecc128c86c552fb963fe846ca4e011b1be053728f798185a1687502f6d398e", size = 2335663, upload-time = "2026-03-13T13:53:56.23Z" }, - { url = "https://files.pythonhosted.org/packages/42/c5/4d2ed3ca6e33617fc5624467da353337f06e7f637707478903c785bd8e20/fonttools-4.62.1-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:1596aeaddf7f78e21e68293c011316a25267b3effdaccaf4d59bc9159d681b82", size = 2947288, upload-time = "2026-03-13T13:53:59.397Z" }, - { url = "https://files.pythonhosted.org/packages/1f/e9/7ab11ddfda48ed0f89b13380e5595ba572619c27077be0b2c447a63ff351/fonttools-4.62.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:8f8fca95d3bb3208f59626a4b0ea6e526ee51f5a8ad5d91821c165903e8d9260", size = 2449023, upload-time = "2026-03-13T13:54:01.642Z" }, - { url = "https://files.pythonhosted.org/packages/b2/10/a800fa090b5e8819942e54e19b55fc7c21fe14a08757c3aa3ca8db358939/fonttools-4.62.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee91628c08e76f77b533d65feb3fbe6d9dad699f95be51cf0d022db94089cdc4", size = 5137599, upload-time = "2026-03-13T13:54:04.495Z" }, - { url = "https://files.pythonhosted.org/packages/37/dc/8ccd45033fffd74deb6912fa1ca524643f584b94c87a16036855b498a1ed/fonttools-4.62.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5f37df1cac61d906e7b836abe356bc2f34c99d4477467755c216b72aa3dc748b", size = 4920933, upload-time = "2026-03-13T13:54:07.557Z" }, - { url = "https://files.pythonhosted.org/packages/99/eb/e618adefb839598d25ac8136cd577925d6c513dc0d931d93b8af956210f0/fonttools-4.62.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:92bb00a947e666169c99b43753c4305fc95a890a60ef3aeb2a6963e07902cc87", size = 5016232, upload-time = "2026-03-13T13:54:10.611Z" }, - { url = "https://files.pythonhosted.org/packages/d9/5f/9b5c9bfaa8ec82def8d8168c4f13615990d6ce5996fe52bd49bfb5e05134/fonttools-4.62.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:bdfe592802ef939a0e33106ea4a318eeb17822c7ee168c290273cbd5fabd746c", size = 5042987, upload-time = "2026-03-13T13:54:13.569Z" }, - { url = "https://files.pythonhosted.org/packages/90/aa/dfbbe24c6a6afc5c203d90cc0343e24bcbb09e76d67c4d6eef8c2558d7ba/fonttools-4.62.1-cp314-cp314t-win32.whl", hash = "sha256:b820fcb92d4655513d8402d5b219f94481c4443d825b4372c75a2072aa4b357a", size = 2348021, upload-time = "2026-03-13T13:54:16.98Z" }, - { url = "https://files.pythonhosted.org/packages/13/6f/ae9c4e4dd417948407b680855c2c7790efb52add6009aaecff1e3bc50e8e/fonttools-4.62.1-cp314-cp314t-win_amd64.whl", hash = "sha256:59b372b4f0e113d3746b88985f1c796e7bf830dd54b28374cd85c2b8acd7583e", size = 2414147, upload-time = "2026-03-13T13:54:19.416Z" }, - { url = "https://files.pythonhosted.org/packages/fd/ba/56147c165442cc5ba7e82ecf301c9a68353cede498185869e6e02b4c264f/fonttools-4.62.1-py3-none-any.whl", hash = "sha256:7487782e2113861f4ddcc07c3436450659e3caa5e470b27dc2177cade2d8e7fd", size = 1152647, upload-time = "2026-03-13T13:54:22.735Z" }, -] - [[package]] name = "frozenlist" version = "1.8.0" @@ -1739,35 +1449,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e6/ab/fb21f4c939bb440104cc2b396d3be1d9b7a9fd3c6c2a53d98c45b3d7c954/fsspec-2026.2.0-py3-none-any.whl", hash = "sha256:98de475b5cb3bd66bedd5c4679e87b4fdfe1a3bf4d707b151b3c07e58c9a2437", size = 202505, upload-time = "2026-02-05T21:50:51.819Z" }, ] -[package.optional-dependencies] -http = [ - { name = "aiohttp" }, -] - -[[package]] -name = "gitdb" -version = "4.0.12" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "smmap" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/72/94/63b0fc47eb32792c7ba1fe1b694daec9a63620db1e313033d18140c2320a/gitdb-4.0.12.tar.gz", hash = "sha256:5ef71f855d191a3326fcfbc0d5da835f26b13fbcba60c32c21091c349ffdb571", size = 394684, upload-time = "2025-01-02T07:20:46.413Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a0/61/5c78b91c3143ed5c14207f463aecfc8f9dbb5092fb2869baf37c273b2705/gitdb-4.0.12-py3-none-any.whl", hash = "sha256:67073e15955400952c6565cc3e707c554a4eea2e428946f7a4c162fab9bd9bcf", size = 62794, upload-time = "2025-01-02T07:20:43.624Z" }, -] - -[[package]] -name = "gitpython" -version = "3.1.46" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "gitdb" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/df/b5/59d16470a1f0dfe8c793f9ef56fd3826093fc52b3bd96d6b9d6c26c7e27b/gitpython-3.1.46.tar.gz", hash = "sha256:400124c7d0ef4ea03f7310ac2fbf7151e09ff97f2a3288d64a440c584a29c37f", size = 215371, upload-time = "2026-01-01T15:37:32.073Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/6a/09/e21df6aef1e1ffc0c816f0522ddc3f6dcded766c3261813131c78a704470/gitpython-3.1.46-py3-none-any.whl", hash = "sha256:79812ed143d9d25b6d176a10bb511de0f9c67b1fa641d82097b0ab90398a2058", size = 208620, upload-time = "2026-01-01T15:37:30.574Z" }, -] - [[package]] name = "google-api-core" version = "2.30.3" @@ -1851,53 +1532,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/69/28/23eea8acd65972bbfe295ce3666b28ac510dfcb115fac089d3edb0feb00a/googleapis_common_protos-1.73.0-py3-none-any.whl", hash = "sha256:dfdaaa2e860f242046be561e6d6cb5c5f1541ae02cfbcb034371aadb2942b4e8", size = 297578, upload-time = "2026-03-06T21:52:33.933Z" }, ] -[[package]] -name = "greenlet" -version = "3.3.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a3/51/1664f6b78fc6ebbd98019a1fd730e83fa78f2db7058f72b1463d3612b8db/greenlet-3.3.2.tar.gz", hash = "sha256:2eaf067fc6d886931c7962e8c6bede15d2f01965560f3359b27c80bde2d151f2", size = 188267, upload-time = "2026-02-20T20:54:15.531Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/f3/47/16400cb42d18d7a6bb46f0626852c1718612e35dcb0dffa16bbaffdf5dd2/greenlet-3.3.2-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:c56692189a7d1c7606cb794be0a8381470d95c57ce5be03fb3d0ef57c7853b86", size = 278890, upload-time = "2026-02-20T20:19:39.263Z" }, - { url = "https://files.pythonhosted.org/packages/a3/90/42762b77a5b6aa96cd8c0e80612663d39211e8ae8a6cd47c7f1249a66262/greenlet-3.3.2-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1ebd458fa8285960f382841da585e02201b53a5ec2bac6b156fc623b5ce4499f", size = 581120, upload-time = "2026-02-20T20:47:30.161Z" }, - { url = "https://files.pythonhosted.org/packages/bf/6f/f3d64f4fa0a9c7b5c5b3c810ff1df614540d5aa7d519261b53fba55d4df9/greenlet-3.3.2-cp311-cp311-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a443358b33c4ec7b05b79a7c8b466f5d275025e750298be7340f8fc63dff2a55", size = 594363, upload-time = "2026-02-20T20:55:56.965Z" }, - { url = "https://files.pythonhosted.org/packages/72/83/3e06a52aca8128bdd4dcd67e932b809e76a96ab8c232a8b025b2850264c5/greenlet-3.3.2-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8e2cd90d413acbf5e77ae41e5d3c9b3ac1d011a756d7284d7f3f2b806bbd6358", size = 594156, upload-time = "2026-02-20T20:20:59.955Z" }, - { url = "https://files.pythonhosted.org/packages/70/79/0de5e62b873e08fe3cef7dbe84e5c4bc0e8ed0c7ff131bccb8405cd107c8/greenlet-3.3.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:442b6057453c8cb29b4fb36a2ac689382fc71112273726e2423f7f17dc73bf99", size = 1554649, upload-time = "2026-02-20T20:49:32.293Z" }, - { url = "https://files.pythonhosted.org/packages/5a/00/32d30dee8389dc36d42170a9c66217757289e2afb0de59a3565260f38373/greenlet-3.3.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:45abe8eb6339518180d5a7fa47fa01945414d7cca5ecb745346fc6a87d2750be", size = 1619472, upload-time = "2026-02-20T20:21:07.966Z" }, - { url = "https://files.pythonhosted.org/packages/f1/3a/efb2cf697fbccdf75b24e2c18025e7dfa54c4f31fab75c51d0fe79942cef/greenlet-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:1e692b2dae4cc7077cbb11b47d258533b48c8fde69a33d0d8a82e2fe8d8531d5", size = 230389, upload-time = "2026-02-20T20:17:18.772Z" }, - { url = "https://files.pythonhosted.org/packages/e1/a1/65bbc059a43a7e2143ec4fc1f9e3f673e04f9c7b371a494a101422ac4fd5/greenlet-3.3.2-cp311-cp311-win_arm64.whl", hash = "sha256:02b0a8682aecd4d3c6c18edf52bc8e51eacdd75c8eac52a790a210b06aa295fd", size = 229645, upload-time = "2026-02-20T20:18:18.695Z" }, - { url = "https://files.pythonhosted.org/packages/ea/ab/1608e5a7578e62113506740b88066bf09888322a311cff602105e619bd87/greenlet-3.3.2-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:ac8d61d4343b799d1e526db579833d72f23759c71e07181c2d2944e429eb09cd", size = 280358, upload-time = "2026-02-20T20:17:43.971Z" }, - { url = "https://files.pythonhosted.org/packages/a5/23/0eae412a4ade4e6623ff7626e38998cb9b11e9ff1ebacaa021e4e108ec15/greenlet-3.3.2-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3ceec72030dae6ac0c8ed7591b96b70410a8be370b6a477b1dbc072856ad02bd", size = 601217, upload-time = "2026-02-20T20:47:31.462Z" }, - { url = "https://files.pythonhosted.org/packages/f8/16/5b1678a9c07098ecb9ab2dd159fafaf12e963293e61ee8d10ecb55273e5e/greenlet-3.3.2-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a2a5be83a45ce6188c045bcc44b0ee037d6a518978de9a5d97438548b953a1ac", size = 611792, upload-time = "2026-02-20T20:55:58.423Z" }, - { url = "https://files.pythonhosted.org/packages/50/1f/5155f55bd71cabd03765a4aac9ac446be129895271f73872c36ebd4b04b6/greenlet-3.3.2-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:43e99d1749147ac21dde49b99c9abffcbc1e2d55c67501465ef0930d6e78e070", size = 613875, upload-time = "2026-02-20T20:21:01.102Z" }, - { url = "https://files.pythonhosted.org/packages/fc/dd/845f249c3fcd69e32df80cdab059b4be8b766ef5830a3d0aa9d6cad55beb/greenlet-3.3.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4c956a19350e2c37f2c48b336a3afb4bff120b36076d9d7fb68cb44e05d95b79", size = 1571467, upload-time = "2026-02-20T20:49:33.495Z" }, - { url = "https://files.pythonhosted.org/packages/2a/50/2649fe21fcc2b56659a452868e695634722a6655ba245d9f77f5656010bf/greenlet-3.3.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6c6f8ba97d17a1e7d664151284cb3315fc5f8353e75221ed4324f84eb162b395", size = 1640001, upload-time = "2026-02-20T20:21:09.154Z" }, - { url = "https://files.pythonhosted.org/packages/9b/40/cc802e067d02af8b60b6771cea7d57e21ef5e6659912814babb42b864713/greenlet-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:34308836d8370bddadb41f5a7ce96879b72e2fdfb4e87729330c6ab52376409f", size = 231081, upload-time = "2026-02-20T20:17:28.121Z" }, - { url = "https://files.pythonhosted.org/packages/58/2e/fe7f36ff1982d6b10a60d5e0740c759259a7d6d2e1dc41da6d96de32fff6/greenlet-3.3.2-cp312-cp312-win_arm64.whl", hash = "sha256:d3a62fa76a32b462a97198e4c9e99afb9ab375115e74e9a83ce180e7a496f643", size = 230331, upload-time = "2026-02-20T20:17:23.34Z" }, - { url = "https://files.pythonhosted.org/packages/ac/48/f8b875fa7dea7dd9b33245e37f065af59df6a25af2f9561efa8d822fde51/greenlet-3.3.2-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:aa6ac98bdfd716a749b84d4034486863fd81c3abde9aa3cf8eff9127981a4ae4", size = 279120, upload-time = "2026-02-20T20:19:01.9Z" }, - { url = "https://files.pythonhosted.org/packages/49/8d/9771d03e7a8b1ee456511961e1b97a6d77ae1dea4a34a5b98eee706689d3/greenlet-3.3.2-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ab0c7e7901a00bc0a7284907273dc165b32e0d109a6713babd04471327ff7986", size = 603238, upload-time = "2026-02-20T20:47:32.873Z" }, - { url = "https://files.pythonhosted.org/packages/59/0e/4223c2bbb63cd5c97f28ffb2a8aee71bdfb30b323c35d409450f51b91e3e/greenlet-3.3.2-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d248d8c23c67d2291ffd47af766e2a3aa9fa1c6703155c099feb11f526c63a92", size = 614219, upload-time = "2026-02-20T20:55:59.817Z" }, - { url = "https://files.pythonhosted.org/packages/7a/34/259b28ea7a2a0c904b11cd36c79b8cef8019b26ee5dbe24e73b469dea347/greenlet-3.3.2-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b6997d360a4e6a4e936c0f9625b1c20416b8a0ea18a8e19cabbefc712e7397ab", size = 616774, upload-time = "2026-02-20T20:21:02.454Z" }, - { url = "https://files.pythonhosted.org/packages/0a/03/996c2d1689d486a6e199cb0f1cf9e4aa940c500e01bdf201299d7d61fa69/greenlet-3.3.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:64970c33a50551c7c50491671265d8954046cb6e8e2999aacdd60e439b70418a", size = 1571277, upload-time = "2026-02-20T20:49:34.795Z" }, - { url = "https://files.pythonhosted.org/packages/d9/c4/2570fc07f34a39f2caf0bf9f24b0a1a0a47bc2e8e465b2c2424821389dfc/greenlet-3.3.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1a9172f5bf6bd88e6ba5a84e0a68afeac9dc7b6b412b245dd64f52d83c81e55b", size = 1640455, upload-time = "2026-02-20T20:21:10.261Z" }, - { url = "https://files.pythonhosted.org/packages/91/39/5ef5aa23bc545aa0d31e1b9b55822b32c8da93ba657295840b6b34124009/greenlet-3.3.2-cp313-cp313-win_amd64.whl", hash = "sha256:a7945dd0eab63ded0a48e4dcade82939783c172290a7903ebde9e184333ca124", size = 230961, upload-time = "2026-02-20T20:16:58.461Z" }, - { url = "https://files.pythonhosted.org/packages/62/6b/a89f8456dcb06becff288f563618e9f20deed8dd29beea14f9a168aef64b/greenlet-3.3.2-cp313-cp313-win_arm64.whl", hash = "sha256:394ead29063ee3515b4e775216cb756b2e3b4a7e55ae8fd884f17fa579e6b327", size = 230221, upload-time = "2026-02-20T20:17:37.152Z" }, - { url = "https://files.pythonhosted.org/packages/3f/ae/8bffcbd373b57a5992cd077cbe8858fff39110480a9d50697091faea6f39/greenlet-3.3.2-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:8d1658d7291f9859beed69a776c10822a0a799bc4bfe1bd4272bb60e62507dab", size = 279650, upload-time = "2026-02-20T20:18:00.783Z" }, - { url = "https://files.pythonhosted.org/packages/d1/c0/45f93f348fa49abf32ac8439938726c480bd96b2a3c6f4d949ec0124b69f/greenlet-3.3.2-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:18cb1b7337bca281915b3c5d5ae19f4e76d35e1df80f4ad3c1a7be91fadf1082", size = 650295, upload-time = "2026-02-20T20:47:34.036Z" }, - { url = "https://files.pythonhosted.org/packages/b3/de/dd7589b3f2b8372069ab3e4763ea5329940fc7ad9dcd3e272a37516d7c9b/greenlet-3.3.2-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c2e47408e8ce1c6f1ceea0dffcdf6ebb85cc09e55c7af407c99f1112016e45e9", size = 662163, upload-time = "2026-02-20T20:56:01.295Z" }, - { url = "https://files.pythonhosted.org/packages/d2/d8/09bfa816572a4d83bccd6750df1926f79158b1c36c5f73786e26dbe4ee38/greenlet-3.3.2-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:63d10328839d1973e5ba35e98cccbca71b232b14051fd957b6f8b6e8e80d0506", size = 664160, upload-time = "2026-02-20T20:21:04.015Z" }, - { url = "https://files.pythonhosted.org/packages/48/cf/56832f0c8255d27f6c35d41b5ec91168d74ec721d85f01a12131eec6b93c/greenlet-3.3.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:8e4ab3cfb02993c8cc248ea73d7dae6cec0253e9afa311c9b37e603ca9fad2ce", size = 1619181, upload-time = "2026-02-20T20:49:36.052Z" }, - { url = "https://files.pythonhosted.org/packages/0a/23/b90b60a4aabb4cec0796e55f25ffbfb579a907c3898cd2905c8918acaa16/greenlet-3.3.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:94ad81f0fd3c0c0681a018a976e5c2bd2ca2d9d94895f23e7bb1af4e8af4e2d5", size = 1687713, upload-time = "2026-02-20T20:21:11.684Z" }, - { url = "https://files.pythonhosted.org/packages/f3/ca/2101ca3d9223a1dc125140dbc063644dca76df6ff356531eb27bc267b446/greenlet-3.3.2-cp314-cp314-win_amd64.whl", hash = "sha256:8c4dd0f3997cf2512f7601563cc90dfb8957c0cff1e3a1b23991d4ea1776c492", size = 232034, upload-time = "2026-02-20T20:20:08.186Z" }, - { url = "https://files.pythonhosted.org/packages/f6/4a/ecf894e962a59dea60f04877eea0fd5724618da89f1867b28ee8b91e811f/greenlet-3.3.2-cp314-cp314-win_arm64.whl", hash = "sha256:cd6f9e2bbd46321ba3bbb4c8a15794d32960e3b0ae2cc4d49a1a53d314805d71", size = 231437, upload-time = "2026-02-20T20:18:59.722Z" }, - { url = "https://files.pythonhosted.org/packages/98/6d/8f2ef704e614bcf58ed43cfb8d87afa1c285e98194ab2cfad351bf04f81e/greenlet-3.3.2-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:e26e72bec7ab387ac80caa7496e0f908ff954f31065b0ffc1f8ecb1338b11b54", size = 286617, upload-time = "2026-02-20T20:19:29.856Z" }, - { url = "https://files.pythonhosted.org/packages/5e/0d/93894161d307c6ea237a43988f27eba0947b360b99ac5239ad3fe09f0b47/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b466dff7a4ffda6ca975979bab80bdadde979e29fc947ac3be4451428d8b0e4", size = 655189, upload-time = "2026-02-20T20:47:35.742Z" }, - { url = "https://files.pythonhosted.org/packages/f5/2c/d2d506ebd8abcb57386ec4f7ba20f4030cbe56eae541bc6fd6ef399c0b41/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b8bddc5b73c9720bea487b3bffdb1840fe4e3656fba3bd40aa1489e9f37877ff", size = 658225, upload-time = "2026-02-20T20:56:02.527Z" }, - { url = "https://files.pythonhosted.org/packages/8e/30/3a09155fbf728673a1dea713572d2d31159f824a37c22da82127056c44e4/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b26b0f4428b871a751968285a1ac9648944cea09807177ac639b030bddebcea4", size = 657907, upload-time = "2026-02-20T20:21:05.259Z" }, - { url = "https://files.pythonhosted.org/packages/f3/fd/d05a4b7acd0154ed758797f0a43b4c0962a843bedfe980115e842c5b2d08/greenlet-3.3.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1fb39a11ee2e4d94be9a76671482be9398560955c9e568550de0224e41104727", size = 1618857, upload-time = "2026-02-20T20:49:37.309Z" }, - { url = "https://files.pythonhosted.org/packages/6f/e1/50ee92a5db521de8f35075b5eff060dd43d39ebd46c2181a2042f7070385/greenlet-3.3.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:20154044d9085151bc309e7689d6f7ba10027f8f5a8c0676ad398b951913d89e", size = 1680010, upload-time = "2026-02-20T20:21:13.427Z" }, - { url = "https://files.pythonhosted.org/packages/29/4b/45d90626aef8e65336bed690106d1382f7a43665e2249017e9527df8823b/greenlet-3.3.2-cp314-cp314t-win_amd64.whl", hash = "sha256:c04c5e06ec3e022cbfe2cd4a846e1d4e50087444f875ff6d2c2ad8445495cf1a", size = 237086, upload-time = "2026-02-20T20:20:45.786Z" }, -] - [[package]] name = "grpclib" version = "0.4.9" @@ -1911,21 +1545,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5c/90/b0cbbd9efcc82816c58f31a34963071aa19fb792a212a5d9caf8e0fc3097/grpclib-0.4.9-py3-none-any.whl", hash = "sha256:7762ec1c8ed94dfad597475152dd35cbd11aecaaca2f243e29702435ca24cf0e", size = 77063, upload-time = "2025-12-14T22:23:13.224Z" }, ] -[[package]] -name = "gymnasium" -version = "1.2.3" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "cloudpickle" }, - { name = "farama-notifications" }, - { name = "numpy" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/76/59/653a9417d98ed3e29ef9734ba52c3495f6c6823b8d5c0c75369f25111708/gymnasium-1.2.3.tar.gz", hash = "sha256:2b2cb5b5fbbbdf3afb9f38ca952cc48aa6aa3e26561400d940747fda3ad42509", size = 829230, upload-time = "2025-12-18T16:51:10.234Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/56/d3/ea5f088e3638dbab12e5c20d6559d5b3bdaeaa1f2af74e526e6815836285/gymnasium-1.2.3-py3-none-any.whl", hash = "sha256:e6314bba8f549c7fdcc8677f7cd786b64908af6e79b57ddaa5ce1825bffb5373", size = 952113, upload-time = "2025-12-18T16:51:08.445Z" }, -] - [[package]] name = "h11" version = "0.16.0" @@ -2068,6 +1687,7 @@ mcp = [ ] messaging = [ { name = "aiohttp" }, + { name = "brotlicffi" }, { name = "discord-py", extra = ["voice"] }, { name = "python-telegram-bot", extra = ["webhooks"] }, { name = "qrcode" }, @@ -2084,13 +1704,6 @@ pty = [ { name = "ptyprocess", marker = "sys_platform != 'win32'" }, { name = "pywinpty", marker = "sys_platform == 'win32'" }, ] -rl = [ - { name = "atroposlib" }, - { name = "fastapi" }, - { name = "tinker" }, - { name = "uvicorn", extra = ["standard"] }, - { name = "wandb" }, -] slack = [ { name = "aiohttp" }, { name = "slack-bolt" }, @@ -2138,9 +1751,6 @@ web = [ { name = "fastapi" }, { name = "uvicorn", extra = ["standard"] }, ] -yc-bench = [ - { name = "yc-bench", marker = "python_full_version >= '3.12'" }, -] youtube = [ { name = "youtube-transcript-api" }, ] @@ -2157,8 +1767,8 @@ requires-dist = [ { name = "alibabacloud-dingtalk", marker = "extra == 'dingtalk'", specifier = "==2.2.42" }, { name = "anthropic", marker = "extra == 'anthropic'", specifier = "==0.86.0" }, { name = "asyncpg", marker = "extra == 'matrix'", specifier = "==0.31.0" }, - { name = "atroposlib", marker = "extra == 'rl'", git = "https://github.com/NousResearch/atropos.git?rev=c20c85256e5a45ad31edf8b7276e9c5ee1995a30" }, { name = "boto3", marker = "extra == 'bedrock'", specifier = "==1.42.89" }, + { name = "brotlicffi", marker = "extra == 'messaging'", specifier = "==1.2.0.1" }, { name = "croniter", specifier = "==6.0.0" }, { name = "daytona", marker = "extra == 'daytona'", specifier = "==0.155.0" }, { name = "debugpy", marker = "extra == 'dev'", specifier = "==1.8.20" }, @@ -2168,7 +1778,6 @@ requires-dist = [ { name = "elevenlabs", marker = "extra == 'tts-premium'", specifier = "==1.59.0" }, { name = "exa-py", marker = "extra == 'exa'", specifier = "==2.10.2" }, { name = "fal-client", marker = "extra == 'fal'", specifier = "==0.13.1" }, - { name = "fastapi", marker = "extra == 'rl'", specifier = "==0.133.1" }, { name = "fastapi", marker = "extra == 'web'", specifier = "==0.133.1" }, { name = "faster-whisper", marker = "extra == 'voice'", specifier = "==1.2.1" }, { name = "fire", specifier = "==0.7.1" }, @@ -2240,49 +1849,13 @@ requires-dist = [ { name = "slack-sdk", marker = "extra == 'slack'", specifier = "==3.40.1" }, { name = "sounddevice", marker = "extra == 'voice'", specifier = "==0.5.5" }, { name = "tenacity", specifier = "==9.1.4" }, - { name = "tinker", marker = "extra == 'rl'", git = "https://github.com/thinking-machines-lab/tinker.git?rev=30517b667f18a3dfb7ef33fb56cf686d5820ba2b" }, { name = "ty", marker = "extra == 'dev'", specifier = "==0.0.21" }, { name = "tzdata", marker = "sys_platform == 'win32'", specifier = "==2025.3" }, - { name = "uvicorn", extras = ["standard"], marker = "extra == 'rl'", specifier = "==0.41.0" }, { name = "uvicorn", extras = ["standard"], marker = "extra == 'web'", specifier = "==0.41.0" }, { name = "vercel", marker = "extra == 'vercel'", specifier = "==0.5.7" }, - { name = "wandb", marker = "extra == 'rl'", specifier = "==0.25.1" }, - { name = "yc-bench", marker = "python_full_version >= '3.12' and extra == 'yc-bench'", git = "https://github.com/collinear-ai/yc-bench.git?rev=bfb0c88062450f46341bd9a5298903fc2e952a5c" }, { name = "youtube-transcript-api", marker = "extra == 'youtube'", specifier = "==1.2.4" }, ] -provides-extras = ["anthropic", "exa", "firecrawl", "parallel-web", "fal", "edge-tts", "modal", "daytona", "vercel", "hindsight", "dev", "messaging", "cron", "slack", "matrix", "cli", "tts-premium", "voice", "pty", "honcho", "mcp", "homeassistant", "sms", "computer-use", "acp", "bedrock", "termux", "termux-all", "dingtalk", "feishu", "google", "youtube", "web", "rl", "yc-bench", "all"] - -[[package]] -name = "hf-transfer" -version = "0.1.9" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/1a/eb/8fc64f40388c29ce8ce3b2b180a089d4d6b25b1d0d232d016704cb852104/hf_transfer-0.1.9.tar.gz", hash = "sha256:035572865dab29d17e783fbf1e84cf1cb24f3fcf8f1b17db1cfc7fdf139f02bf", size = 25201, upload-time = "2025-01-07T10:05:12.947Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a4/78/0dce00208f585fae675f40033ef9a30dedfa83665d5ac79f16beb4a0a6c2/hf_transfer-0.1.9-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:6e94e8822da79573c9b6ae4d6b2f847c59a7a06c5327d7db20751b68538dc4f6", size = 1386084, upload-time = "2025-01-07T10:04:47.874Z" }, - { url = "https://files.pythonhosted.org/packages/ea/2e/3d60b1a9e9f29a2152aa66c823bf5e399ae7be3fef310ff0de86779c5d2d/hf_transfer-0.1.9-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3ebc4ab9023414880c8b1d3c38174d1c9989eb5022d37e814fa91a3060123eb0", size = 1343558, upload-time = "2025-01-07T10:04:42.313Z" }, - { url = "https://files.pythonhosted.org/packages/fb/38/130a5ac3747f104033591bcac1c961cb1faadfdc91704f59b09c0b465ff2/hf_transfer-0.1.9-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8674026f21ed369aa2a0a4b46000aca850fc44cd2b54af33a172ce5325b4fc82", size = 3726676, upload-time = "2025-01-07T10:04:11.539Z" }, - { url = "https://files.pythonhosted.org/packages/15/a1/f4e27c5ad17aac616ae0849e2aede5aae31db8267a948c6b3eeb9fd96446/hf_transfer-0.1.9-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3a736dfbb2c84f5a2c975478ad200c0c8bfcb58a25a35db402678fb87ce17fa4", size = 3062920, upload-time = "2025-01-07T10:04:16.297Z" }, - { url = "https://files.pythonhosted.org/packages/8d/0d/727abdfba39bc3f1132cfa4c970588c2c0bb0d82fe2d645cc10f4e2f8e0b/hf_transfer-0.1.9-cp313-cp313t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:504b8427fd785dd8546d53b9fafe6e436bd7a3adf76b9dce556507650a7b4567", size = 3578681, upload-time = "2025-01-07T10:04:29.702Z" }, - { url = "https://files.pythonhosted.org/packages/50/d0/2b213eb1ea8b1252ccaf1a6c804d0aba03fea38aae4124df6a3acb70511a/hf_transfer-0.1.9-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2c7fc1b85f4d0f76e452765d7648c9f4bfd0aedb9ced2ae1ebfece2d8cfaf8e2", size = 3398837, upload-time = "2025-01-07T10:04:22.778Z" }, - { url = "https://files.pythonhosted.org/packages/8c/8a/79dbce9006e0bd6b74516f97451a7b7c64dbbb426df15d901dd438cfeee3/hf_transfer-0.1.9-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d991376f0eac70a60f0cbc95602aa708a6f7c8617f28b4945c1431d67b8e3c8", size = 3546986, upload-time = "2025-01-07T10:04:36.415Z" }, - { url = "https://files.pythonhosted.org/packages/a9/f7/9ac239b6ee6fe0bad130325d987a93ea58c4118e50479f0786f1733b37e8/hf_transfer-0.1.9-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e6ac4eddcd99575ed3735ed911ddf9d1697e2bd13aa3f0ad7e3904dd4863842e", size = 4071715, upload-time = "2025-01-07T10:04:53.224Z" }, - { url = "https://files.pythonhosted.org/packages/d8/a3/0ed697279f5eeb7a40f279bd783cf50e6d0b91f24120dcf66ef2cf8822b4/hf_transfer-0.1.9-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:57fd9880da1ee0f47250f735f791fab788f0aa1ee36afc49f761349869c8b4d9", size = 3388081, upload-time = "2025-01-07T10:04:57.818Z" }, - { url = "https://files.pythonhosted.org/packages/dc/eb/47e477bdf1d784f31c7540db6cc8c354b777e51a186897a7abda34517f36/hf_transfer-0.1.9-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:5d561f0520f493c66b016d99ceabe69c23289aa90be38dd802d2aef279f15751", size = 3658654, upload-time = "2025-01-07T10:05:03.168Z" }, - { url = "https://files.pythonhosted.org/packages/45/07/6661e43fbee09594a8a5e9bb778107d95fe38dac4c653982afe03d32bd4d/hf_transfer-0.1.9-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:a5b366d34cd449fe9b20ef25941e6eef0460a2f74e7389f02e673e1f88ebd538", size = 3690551, upload-time = "2025-01-07T10:05:09.238Z" }, - { url = "https://files.pythonhosted.org/packages/81/f5/461d2e5f307e5048289b1168d5c642ae3bb2504e88dff1a38b92ed990a21/hf_transfer-0.1.9-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:e66acf91df4a8b72f60223059df3003062a5ae111757187ed1a06750a30e911b", size = 1393046, upload-time = "2025-01-07T10:04:51.003Z" }, - { url = "https://files.pythonhosted.org/packages/41/ba/8d9fd9f1083525edfcb389c93738c802f3559cb749324090d7109c8bf4c2/hf_transfer-0.1.9-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:8669dbcc7a3e2e8d61d42cd24da9c50d57770bd74b445c65123291ca842a7e7a", size = 1348126, upload-time = "2025-01-07T10:04:45.712Z" }, - { url = "https://files.pythonhosted.org/packages/8e/a2/cd7885bc9959421065a6fae0fe67b6c55becdeda4e69b873e52976f9a9f0/hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8fd0167c4407a3bc4cdd0307e65ada2294ec04f1813d8a69a5243e379b22e9d8", size = 3728604, upload-time = "2025-01-07T10:04:14.173Z" }, - { url = "https://files.pythonhosted.org/packages/f6/2e/a072cf196edfeda3310c9a5ade0a0fdd785e6154b3ce24fc738c818da2a7/hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ee8b10afedcb75f71091bcc197c526a6ebf5c58bbbadb34fdeee6160f55f619f", size = 3064995, upload-time = "2025-01-07T10:04:18.663Z" }, - { url = "https://files.pythonhosted.org/packages/c2/84/aec9ef4c0fab93c1ea2b1badff38c78b4b2f86f0555b26d2051dbc920cde/hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5828057e313de59300dd1abb489444bc452efe3f479d3c55b31a8f680936ba42", size = 3580908, upload-time = "2025-01-07T10:04:32.834Z" }, - { url = "https://files.pythonhosted.org/packages/29/63/b560d39651a56603d64f1a0212d0472a44cbd965db2fa62b99d99cb981bf/hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fc6bd19e1cc177c66bdef15ef8636ad3bde79d5a4f608c158021153b4573509d", size = 3400839, upload-time = "2025-01-07T10:04:26.122Z" }, - { url = "https://files.pythonhosted.org/packages/d6/d8/f87ea6f42456254b48915970ed98e993110521e9263472840174d32c880d/hf_transfer-0.1.9-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cdca9bfb89e6f8f281890cc61a8aff2d3cecaff7e1a4d275574d96ca70098557", size = 3552664, upload-time = "2025-01-07T10:04:40.123Z" }, - { url = "https://files.pythonhosted.org/packages/d6/56/1267c39b65fc8f4e2113b36297320f102718bf5799b544a6cbe22013aa1d/hf_transfer-0.1.9-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:89a23f58b7b7effbc047b8ca286f131b17728c99a9f972723323003ffd1bb916", size = 4073732, upload-time = "2025-01-07T10:04:55.624Z" }, - { url = "https://files.pythonhosted.org/packages/82/1a/9c748befbe3decf7cb415e34f8a0c3789a0a9c55910dea73d581e48c0ce5/hf_transfer-0.1.9-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:dc7fff1345980d6c0ebb92c811d24afa4b98b3e07ed070c8e38cc91fd80478c5", size = 3390096, upload-time = "2025-01-07T10:04:59.98Z" }, - { url = "https://files.pythonhosted.org/packages/72/85/4c03da147b6b4b7cb12e074d3d44eee28604a387ed0eaf7eaaead5069c57/hf_transfer-0.1.9-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:1a6bd16c667ebe89a069ca163060127a794fa3a3525292c900b8c8cc47985b0d", size = 3664743, upload-time = "2025-01-07T10:05:05.416Z" }, - { url = "https://files.pythonhosted.org/packages/e7/6e/e597b04f753f1b09e6893075d53a82a30c13855cbaa791402695b01e369f/hf_transfer-0.1.9-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:d2fde99d502093ade3ab1b53f80da18480e9902aa960dab7f74fb1b9e5bc5746", size = 3695243, upload-time = "2025-01-07T10:05:11.411Z" }, - { url = "https://files.pythonhosted.org/packages/09/89/d4e234727a26b2546c8fb70a276cd924260d60135f2165bf8b9ed67bb9a4/hf_transfer-0.1.9-cp38-abi3-win32.whl", hash = "sha256:435cc3cdc8524ce57b074032b8fd76eed70a4224d2091232fa6a8cef8fd6803e", size = 1086605, upload-time = "2025-01-07T10:05:18.873Z" }, - { url = "https://files.pythonhosted.org/packages/a1/14/f1e15b851d1c2af5b0b1a82bf8eb10bda2da62d98180220ba6fd8879bb5b/hf_transfer-0.1.9-cp38-abi3-win_amd64.whl", hash = "sha256:16f208fc678911c37e11aa7b586bc66a37d02e636208f18b6bc53d29b5df40ad", size = 1160240, upload-time = "2025-01-07T10:05:14.324Z" }, -] +provides-extras = ["anthropic", "exa", "firecrawl", "parallel-web", "fal", "edge-tts", "modal", "daytona", "vercel", "hindsight", "dev", "messaging", "cron", "slack", "matrix", "cli", "tts-premium", "voice", "pty", "honcho", "mcp", "homeassistant", "sms", "computer-use", "acp", "bedrock", "termux", "termux-all", "dingtalk", "feishu", "google", "youtube", "web", "all"] [[package]] name = "hf-xet" @@ -2433,9 +2006,6 @@ wheels = [ ] [package.optional-dependencies] -http2 = [ - { name = "h2" }, -] socks = [ { name = "socksio" }, ] @@ -2615,27 +2185,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/14/2f/967ba146e6d58cf6a652da73885f52fc68001525b4197effc174321d70b4/jmespath-1.1.0-py3-none-any.whl", hash = "sha256:a5663118de4908c91729bea0acadca56526eb2698e83de10cd116ae0f4e97c64", size = 20419, upload-time = "2026-01-22T16:35:24.919Z" }, ] -[[package]] -name = "joblib" -version = "1.5.3" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/41/f2/d34e8b3a08a9cc79a50b2208a93dce981fe615b64d5a4d4abee421d898df/joblib-1.5.3.tar.gz", hash = "sha256:8561a3269e6801106863fd0d6d84bb737be9e7631e33aaed3fb9ce5953688da3", size = 331603, upload-time = "2025-12-15T08:41:46.427Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7b/91/984aca2ec129e2757d1e4e3c81c3fcda9d0f85b74670a094cc443d9ee949/joblib-1.5.3-py3-none-any.whl", hash = "sha256:5fc3c5039fc5ca8c0276333a188bbd59d6b7ab37fe6632daa76bc7f9ec18e713", size = 309071, upload-time = "2025-12-15T08:41:44.973Z" }, -] - -[[package]] -name = "jsonlines" -version = "4.0.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "attrs" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/35/87/bcda8e46c88d0e34cad2f09ee2d0c7f5957bccdb9791b0b934ec84d84be4/jsonlines-4.0.0.tar.gz", hash = "sha256:0c6d2c09117550c089995247f605ae4cf77dd1533041d366351f6f298822ea74", size = 11359, upload-time = "2023-09-01T12:34:44.187Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/f8/62/d9ba6323b9202dd2fe166beab8a86d29465c41a0288cbe229fac60c1ab8d/jsonlines-4.0.0-py3-none-any.whl", hash = "sha256:185b334ff2ca5a91362993f42e83588a360cf95ce4b71a73548502bda52a7c55", size = 8701, upload-time = "2023-09-01T12:34:42.563Z" }, -] - [[package]] name = "jsonschema" version = "4.26.0" @@ -2663,112 +2212,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe", size = 18437, upload-time = "2025-09-08T01:34:57.871Z" }, ] -[[package]] -name = "kiwisolver" -version = "1.5.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d0/67/9c61eccb13f0bdca9307614e782fec49ffdde0f7a2314935d489fa93cd9c/kiwisolver-1.5.0.tar.gz", hash = "sha256:d4193f3d9dc3f6f79aaed0e5637f45d98850ebf01f7ca20e69457f3e8946b66a", size = 103482, upload-time = "2026-03-09T13:15:53.382Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/12/dd/a495a9c104be1c476f0386e714252caf2b7eca883915422a64c50b88c6f5/kiwisolver-1.5.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9eed0f7edbb274413b6ee781cca50541c8c0facd3d6fd289779e494340a2b85c", size = 122798, upload-time = "2026-03-09T13:12:58.963Z" }, - { url = "https://files.pythonhosted.org/packages/11/60/37b4047a2af0cf5ef6d8b4b26e91829ae6fc6a2d1f74524bcb0e7cd28a32/kiwisolver-1.5.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3c4923e404d6bcd91b6779c009542e5647fef32e4a5d75e115e3bbac6f2335eb", size = 66216, upload-time = "2026-03-09T13:13:00.155Z" }, - { url = "https://files.pythonhosted.org/packages/0a/aa/510dc933d87767584abfe03efa445889996c70c2990f6f87c3ebaa0a18c5/kiwisolver-1.5.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0df54df7e686afa55e6f21fb86195224a6d9beb71d637e8d7920c95cf0f89aac", size = 63911, upload-time = "2026-03-09T13:13:01.671Z" }, - { url = "https://files.pythonhosted.org/packages/80/46/bddc13df6c2a40741e0cc7865bb1c9ed4796b6760bd04ce5fae3928ef917/kiwisolver-1.5.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2517e24d7315eb51c10664cdb865195df38ab74456c677df67bb47f12d088a27", size = 1438209, upload-time = "2026-03-09T13:13:03.385Z" }, - { url = "https://files.pythonhosted.org/packages/fd/d6/76621246f5165e5372f02f5e6f3f48ea336a8f9e96e43997d45b240ed8cd/kiwisolver-1.5.0-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ff710414307fefa903e0d9bdf300972f892c23477829f49504e59834f4195398", size = 1248888, upload-time = "2026-03-09T13:13:05.231Z" }, - { url = "https://files.pythonhosted.org/packages/b2/c1/31559ec6fb39a5b48035ce29bb63ade628f321785f38c384dee3e2c08bc1/kiwisolver-1.5.0-cp311-cp311-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6176c1811d9d5a04fa391c490cc44f451e240697a16977f11c6f722efb9041db", size = 1266304, upload-time = "2026-03-09T13:13:06.743Z" }, - { url = "https://files.pythonhosted.org/packages/5e/ef/1cb8276f2d29cc6a41e0a042f27946ca347d3a4a75acf85d0a16aa6dcc82/kiwisolver-1.5.0-cp311-cp311-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:50847dca5d197fcbd389c805aa1a1cf32f25d2e7273dc47ab181a517666b68cc", size = 1319650, upload-time = "2026-03-09T13:13:08.607Z" }, - { url = "https://files.pythonhosted.org/packages/4c/e4/5ba3cecd7ce6236ae4a80f67e5d5531287337d0e1f076ca87a5abe4cd5d0/kiwisolver-1.5.0-cp311-cp311-manylinux_2_39_riscv64.whl", hash = "sha256:01808c6d15f4c3e8559595d6d1fe6411c68e4a3822b4b9972b44473b24f4e679", size = 970949, upload-time = "2026-03-09T13:13:10.299Z" }, - { url = "https://files.pythonhosted.org/packages/5a/69/dc61f7ae9a2f071f26004ced87f078235b5507ab6e5acd78f40365655034/kiwisolver-1.5.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:f1f9f4121ec58628c96baa3de1a55a4e3a333c5102c8e94b64e23bf7b2083309", size = 2199125, upload-time = "2026-03-09T13:13:11.841Z" }, - { url = "https://files.pythonhosted.org/packages/e5/7b/abbe0f1b5afa85f8d084b73e90e5f801c0939eba16ac2e49af7c61a6c28d/kiwisolver-1.5.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:b7d335370ae48a780c6e6a6bbfa97342f563744c39c35562f3f367665f5c1de2", size = 2293783, upload-time = "2026-03-09T13:13:14.399Z" }, - { url = "https://files.pythonhosted.org/packages/8a/80/5908ae149d96d81580d604c7f8aefd0e98f4fd728cf172f477e9f2a81744/kiwisolver-1.5.0-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:800ee55980c18545af444d93fdd60c56b580db5cc54867d8cbf8a1dc0829938c", size = 1960726, upload-time = "2026-03-09T13:13:16.047Z" }, - { url = "https://files.pythonhosted.org/packages/84/08/a78cb776f8c085b7143142ce479859cfec086bd09ee638a317040b6ef420/kiwisolver-1.5.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:c438f6ca858697c9ab67eb28246c92508af972e114cac34e57a6d4ba17a3ac08", size = 2464738, upload-time = "2026-03-09T13:13:17.897Z" }, - { url = "https://files.pythonhosted.org/packages/b1/e1/65584da5356ed6cb12c63791a10b208860ac40a83de165cb6a6751a686e3/kiwisolver-1.5.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:8c63c91f95173f9c2a67c7c526b2cea976828a0e7fced9cdcead2802dc10f8a4", size = 2270718, upload-time = "2026-03-09T13:13:19.421Z" }, - { url = "https://files.pythonhosted.org/packages/be/6c/28f17390b62b8f2f520e2915095b3c94d88681ecf0041e75389d9667f202/kiwisolver-1.5.0-cp311-cp311-win_amd64.whl", hash = "sha256:beb7f344487cdcb9e1efe4b7a29681b74d34c08f0043a327a74da852a6749e7b", size = 73480, upload-time = "2026-03-09T13:13:20.818Z" }, - { url = "https://files.pythonhosted.org/packages/d8/0e/2ee5debc4f77a625778fec5501ff3e8036fe361b7ee28ae402a485bb9694/kiwisolver-1.5.0-cp311-cp311-win_arm64.whl", hash = "sha256:ad4ae4ffd1ee9cd11357b4c66b612da9888f4f4daf2f36995eda64bd45370cac", size = 64930, upload-time = "2026-03-09T13:13:21.997Z" }, - { url = "https://files.pythonhosted.org/packages/4d/b2/818b74ebea34dabe6d0c51cb1c572e046730e64844da6ed646d5298c40ce/kiwisolver-1.5.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:4e9750bc21b886308024f8a54ccb9a2cc38ac9fa813bf4348434e3d54f337ff9", size = 123158, upload-time = "2026-03-09T13:13:23.127Z" }, - { url = "https://files.pythonhosted.org/packages/bf/d9/405320f8077e8e1c5c4bd6adc45e1e6edf6d727b6da7f2e2533cf58bff71/kiwisolver-1.5.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:72ec46b7eba5b395e0a7b63025490d3214c11013f4aacb4f5e8d6c3041829588", size = 66388, upload-time = "2026-03-09T13:13:24.765Z" }, - { url = "https://files.pythonhosted.org/packages/99/9f/795fedf35634f746151ca8839d05681ceb6287fbed6cc1c9bf235f7887c2/kiwisolver-1.5.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ed3a984b31da7481b103f68776f7128a89ef26ed40f4dc41a2223cda7fb24819", size = 64068, upload-time = "2026-03-09T13:13:25.878Z" }, - { url = "https://files.pythonhosted.org/packages/c4/13/680c54afe3e65767bed7ec1a15571e1a2f1257128733851ade24abcefbcc/kiwisolver-1.5.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bb5136fb5352d3f422df33f0c879a1b0c204004324150cc3b5e3c4f310c9049f", size = 1477934, upload-time = "2026-03-09T13:13:27.166Z" }, - { url = "https://files.pythonhosted.org/packages/c8/2f/cebfcdb60fd6a9b0f6b47a9337198bcbad6fbe15e68189b7011fd914911f/kiwisolver-1.5.0-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b2af221f268f5af85e776a73d62b0845fc8baf8ef0abfae79d29c77d0e776aaf", size = 1278537, upload-time = "2026-03-09T13:13:28.707Z" }, - { url = "https://files.pythonhosted.org/packages/f2/0d/9b782923aada3fafb1d6b84e13121954515c669b18af0c26e7d21f579855/kiwisolver-1.5.0-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b0f172dc8ffaccb8522d7c5d899de00133f2f1ca7b0a49b7da98e901de87bf2d", size = 1296685, upload-time = "2026-03-09T13:13:30.528Z" }, - { url = "https://files.pythonhosted.org/packages/27/70/83241b6634b04fe44e892688d5208332bde130f38e610c0418f9ede47ded/kiwisolver-1.5.0-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:6ab8ba9152203feec73758dad83af9a0bbe05001eb4639e547207c40cfb52083", size = 1346024, upload-time = "2026-03-09T13:13:32.818Z" }, - { url = "https://files.pythonhosted.org/packages/e4/db/30ed226fb271ae1a6431fc0fe0edffb2efe23cadb01e798caeb9f2ceae8f/kiwisolver-1.5.0-cp312-cp312-manylinux_2_39_riscv64.whl", hash = "sha256:cdee07c4d7f6d72008d3f73b9bf027f4e11550224c7c50d8df1ae4a37c1402a6", size = 987241, upload-time = "2026-03-09T13:13:34.435Z" }, - { url = "https://files.pythonhosted.org/packages/ec/bd/c314595208e4c9587652d50959ead9e461995389664e490f4dce7ff0f782/kiwisolver-1.5.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7c60d3c9b06fb23bd9c6139281ccbdc384297579ae037f08ae90c69f6845c0b1", size = 2227742, upload-time = "2026-03-09T13:13:36.4Z" }, - { url = "https://files.pythonhosted.org/packages/c1/43/0499cec932d935229b5543d073c2b87c9c22846aab48881e9d8d6e742a2d/kiwisolver-1.5.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:e315e5ec90d88e140f57696ff85b484ff68bb311e36f2c414aa4286293e6dee0", size = 2323966, upload-time = "2026-03-09T13:13:38.204Z" }, - { url = "https://files.pythonhosted.org/packages/3d/6f/79b0d760907965acfd9d61826a3d41f8f093c538f55cd2633d3f0db269f6/kiwisolver-1.5.0-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:1465387ac63576c3e125e5337a6892b9e99e0627d52317f3ca79e6930d889d15", size = 1977417, upload-time = "2026-03-09T13:13:39.966Z" }, - { url = "https://files.pythonhosted.org/packages/ab/31/01d0537c41cb75a551a438c3c7a80d0c60d60b81f694dac83dd436aec0d0/kiwisolver-1.5.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:530a3fd64c87cffa844d4b6b9768774763d9caa299e9b75d8eca6a4423b31314", size = 2491238, upload-time = "2026-03-09T13:13:41.698Z" }, - { url = "https://files.pythonhosted.org/packages/e4/34/8aefdd0be9cfd00a44509251ba864f5caf2991e36772e61c408007e7f417/kiwisolver-1.5.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:1d9daea4ea6b9be74fe2f01f7fbade8d6ffab263e781274cffca0dba9be9eec9", size = 2294947, upload-time = "2026-03-09T13:13:43.343Z" }, - { url = "https://files.pythonhosted.org/packages/ad/cf/0348374369ca588f8fe9c338fae49fa4e16eeb10ffb3d012f23a54578a9e/kiwisolver-1.5.0-cp312-cp312-win_amd64.whl", hash = "sha256:f18c2d9782259a6dc132fdc7a63c168cbc74b35284b6d75c673958982a378384", size = 73569, upload-time = "2026-03-09T13:13:45.792Z" }, - { url = "https://files.pythonhosted.org/packages/28/26/192b26196e2316e2bd29deef67e37cdf9870d9af8e085e521afff0fed526/kiwisolver-1.5.0-cp312-cp312-win_arm64.whl", hash = "sha256:f7c7553b13f69c1b29a5bde08ddc6d9d0c8bfb84f9ed01c30db25944aeb852a7", size = 64997, upload-time = "2026-03-09T13:13:46.878Z" }, - { url = "https://files.pythonhosted.org/packages/9d/69/024d6711d5ba575aa65d5538042e99964104e97fa153a9f10bc369182bc2/kiwisolver-1.5.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:fd40bb9cd0891c4c3cb1ddf83f8bbfa15731a248fdc8162669405451e2724b09", size = 123166, upload-time = "2026-03-09T13:13:48.032Z" }, - { url = "https://files.pythonhosted.org/packages/ce/48/adbb40df306f587054a348831220812b9b1d787aff714cfbc8556e38fccd/kiwisolver-1.5.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c0e1403fd7c26d77c1f03e096dc58a5c726503fa0db0456678b8668f76f521e3", size = 66395, upload-time = "2026-03-09T13:13:49.365Z" }, - { url = "https://files.pythonhosted.org/packages/a8/3a/d0a972b34e1c63e2409413104216cd1caa02c5a37cb668d1687d466c1c45/kiwisolver-1.5.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:dda366d548e89a90d88a86c692377d18d8bd64b39c1fb2b92cb31370e2896bbd", size = 64065, upload-time = "2026-03-09T13:13:50.562Z" }, - { url = "https://files.pythonhosted.org/packages/2b/0a/7b98e1e119878a27ba8618ca1e18b14f992ff1eda40f47bccccf4de44121/kiwisolver-1.5.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:332b4f0145c30b5f5ad9374881133e5aa64320428a57c2c2b61e9d891a51c2f3", size = 1477903, upload-time = "2026-03-09T13:13:52.084Z" }, - { url = "https://files.pythonhosted.org/packages/18/d8/55638d89ffd27799d5cc3d8aa28e12f4ce7a64d67b285114dbedc8ea4136/kiwisolver-1.5.0-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0c50b89ffd3e1a911c69a1dd3de7173c0cd10b130f56222e57898683841e4f96", size = 1278751, upload-time = "2026-03-09T13:13:54.673Z" }, - { url = "https://files.pythonhosted.org/packages/b8/97/b4c8d0d18421ecceba20ad8701358453b88e32414e6f6950b5a4bad54e65/kiwisolver-1.5.0-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4db576bb8c3ef9365f8b40fe0f671644de6736ae2c27a2c62d7d8a1b4329f099", size = 1296793, upload-time = "2026-03-09T13:13:56.287Z" }, - { url = "https://files.pythonhosted.org/packages/c4/10/f862f94b6389d8957448ec9df59450b81bec4abb318805375c401a1e6892/kiwisolver-1.5.0-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0b85aad90cea8ac6797a53b5d5f2e967334fa4d1149f031c4537569972596cb8", size = 1346041, upload-time = "2026-03-09T13:13:58.269Z" }, - { url = "https://files.pythonhosted.org/packages/a3/6a/f1650af35821eaf09de398ec0bc2aefc8f211f0cda50204c9f1673741ba9/kiwisolver-1.5.0-cp313-cp313-manylinux_2_39_riscv64.whl", hash = "sha256:d36ca54cb4c6c4686f7cbb7b817f66f5911c12ddb519450bbe86707155028f87", size = 987292, upload-time = "2026-03-09T13:13:59.871Z" }, - { url = "https://files.pythonhosted.org/packages/de/19/d7fb82984b9238115fe629c915007be608ebd23dc8629703d917dbfaffd4/kiwisolver-1.5.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:38f4a703656f493b0ad185211ccfca7f0386120f022066b018eb5296d8613e23", size = 2227865, upload-time = "2026-03-09T13:14:01.401Z" }, - { url = "https://files.pythonhosted.org/packages/7f/b9/46b7f386589fd222dac9e9de9c956ce5bcefe2ee73b4e79891381dda8654/kiwisolver-1.5.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:3ac2360e93cb41be81121755c6462cff3beaa9967188c866e5fce5cf13170859", size = 2324369, upload-time = "2026-03-09T13:14:02.972Z" }, - { url = "https://files.pythonhosted.org/packages/92/8b/95e237cf3d9c642960153c769ddcbe278f182c8affb20cecc1cc983e7cc5/kiwisolver-1.5.0-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:c95cab08d1965db3d84a121f1c7ce7479bdd4072c9b3dafd8fecce48a2e6b902", size = 1977989, upload-time = "2026-03-09T13:14:04.503Z" }, - { url = "https://files.pythonhosted.org/packages/1b/95/980c9df53501892784997820136c01f62bc1865e31b82b9560f980c0e649/kiwisolver-1.5.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:fc20894c3d21194d8041a28b65622d5b86db786da6e3cfe73f0c762951a61167", size = 2491645, upload-time = "2026-03-09T13:14:06.106Z" }, - { url = "https://files.pythonhosted.org/packages/cb/32/900647fd0840abebe1561792c6b31e6a7c0e278fc3973d30572a965ca14c/kiwisolver-1.5.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7a32f72973f0f950c1920475d5c5ea3d971b81b6f0ec53b8d0a956cc965f22e0", size = 2295237, upload-time = "2026-03-09T13:14:08.891Z" }, - { url = "https://files.pythonhosted.org/packages/be/8a/be60e3bbcf513cc5a50f4a3e88e1dcecebb79c1ad607a7222877becaa101/kiwisolver-1.5.0-cp313-cp313-win_amd64.whl", hash = "sha256:0bf3acf1419fa93064a4c2189ac0b58e3be7872bf6ee6177b0d4c63dc4cea276", size = 73573, upload-time = "2026-03-09T13:14:12.327Z" }, - { url = "https://files.pythonhosted.org/packages/4d/d2/64be2e429eb4fca7f7e1c52a91b12663aeaf25de3895e5cca0f47ef2a8d0/kiwisolver-1.5.0-cp313-cp313-win_arm64.whl", hash = "sha256:fa8eb9ecdb7efb0b226acec134e0d709e87a909fa4971a54c0c4f6e88635484c", size = 64998, upload-time = "2026-03-09T13:14:13.469Z" }, - { url = "https://files.pythonhosted.org/packages/b0/69/ce68dd0c85755ae2de490bf015b62f2cea5f6b14ff00a463f9d0774449ff/kiwisolver-1.5.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:db485b3847d182b908b483b2ed133c66d88d49cacf98fd278fadafe11b4478d1", size = 125700, upload-time = "2026-03-09T13:14:14.636Z" }, - { url = "https://files.pythonhosted.org/packages/74/aa/937aac021cf9d4349990d47eb319309a51355ed1dbdc9c077cdc9224cb11/kiwisolver-1.5.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:be12f931839a3bdfe28b584db0e640a65a8bcbc24560ae3fdb025a449b3d754e", size = 67537, upload-time = "2026-03-09T13:14:15.808Z" }, - { url = "https://files.pythonhosted.org/packages/ee/20/3a87fbece2c40ad0f6f0aefa93542559159c5f99831d596050e8afae7a9f/kiwisolver-1.5.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:16b85d37c2cbb3253226d26e64663f755d88a03439a9c47df6246b35defbdfb7", size = 65514, upload-time = "2026-03-09T13:14:18.035Z" }, - { url = "https://files.pythonhosted.org/packages/f0/7f/f943879cda9007c45e1f7dba216d705c3a18d6b35830e488b6c6a4e7cdf0/kiwisolver-1.5.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4432b835675f0ea7414aab3d37d119f7226d24869b7a829caeab49ebda407b0c", size = 1584848, upload-time = "2026-03-09T13:14:19.745Z" }, - { url = "https://files.pythonhosted.org/packages/37/f8/4d4f85cc1870c127c88d950913370dd76138482161cd07eabbc450deff01/kiwisolver-1.5.0-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1b0feb50971481a2cc44d94e88bdb02cdd497618252ae226b8eb1201b957e368", size = 1391542, upload-time = "2026-03-09T13:14:21.54Z" }, - { url = "https://files.pythonhosted.org/packages/04/0b/65dd2916c84d252b244bd405303220f729e7c17c9d7d33dca6feeff9ffc4/kiwisolver-1.5.0-cp313-cp313t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:56fa888f10d0f367155e76ce849fa1166fc9730d13bd2d65a2aa13b6f5424489", size = 1404447, upload-time = "2026-03-09T13:14:23.205Z" }, - { url = "https://files.pythonhosted.org/packages/39/5c/2606a373247babce9b1d056c03a04b65f3cf5290a8eac5d7bdead0a17e21/kiwisolver-1.5.0-cp313-cp313t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:940dda65d5e764406b9fb92761cbf462e4e63f712ab60ed98f70552e496f3bf1", size = 1455918, upload-time = "2026-03-09T13:14:24.74Z" }, - { url = "https://files.pythonhosted.org/packages/d5/d1/c6078b5756670658e9192a2ef11e939c92918833d2745f85cd14a6004bdf/kiwisolver-1.5.0-cp313-cp313t-manylinux_2_39_riscv64.whl", hash = "sha256:89fc958c702ee9a745e4700378f5d23fddbc46ff89e8fdbf5395c24d5c1452a3", size = 1072856, upload-time = "2026-03-09T13:14:26.597Z" }, - { url = "https://files.pythonhosted.org/packages/cb/c8/7def6ddf16eb2b3741d8b172bdaa9af882b03c78e9b0772975408801fa63/kiwisolver-1.5.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9027d773c4ff81487181a925945743413f6069634d0b122d0b37684ccf4f1e18", size = 2333580, upload-time = "2026-03-09T13:14:28.237Z" }, - { url = "https://files.pythonhosted.org/packages/9e/87/2ac1fce0eb1e616fcd3c35caa23e665e9b1948bb984f4764790924594128/kiwisolver-1.5.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:5b233ea3e165e43e35dba1d2b8ecc21cf070b45b65ae17dd2747d2713d942021", size = 2423018, upload-time = "2026-03-09T13:14:30.018Z" }, - { url = "https://files.pythonhosted.org/packages/67/13/c6700ccc6cc218716bfcda4935e4b2997039869b4ad8a94f364c5a3b8e63/kiwisolver-1.5.0-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:ce9bf03dad3b46408c08649c6fbd6ca28a9fce0eb32fdfffa6775a13103b5310", size = 2062804, upload-time = "2026-03-09T13:14:32.888Z" }, - { url = "https://files.pythonhosted.org/packages/1b/bd/877056304626943ff0f1f44c08f584300c199b887cb3176cd7e34f1515f1/kiwisolver-1.5.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:fc4d3f1fb9ca0ae9f97b095963bc6326f1dbfd3779d6679a1e016b9baaa153d3", size = 2597482, upload-time = "2026-03-09T13:14:34.971Z" }, - { url = "https://files.pythonhosted.org/packages/75/19/c60626c47bf0f8ac5dcf72c6c98e266d714f2fbbfd50cf6dab5ede3aaa50/kiwisolver-1.5.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f443b4825c50a51ee68585522ab4a1d1257fac65896f282b4c6763337ac9f5d2", size = 2394328, upload-time = "2026-03-09T13:14:36.816Z" }, - { url = "https://files.pythonhosted.org/packages/47/84/6a6d5e5bb8273756c27b7d810d47f7ef2f1f9b9fd23c9ee9a3f8c75c9cef/kiwisolver-1.5.0-cp313-cp313t-win_arm64.whl", hash = "sha256:893ff3a711d1b515ba9da14ee090519bad4610ed1962fbe298a434e8c5f8db53", size = 68410, upload-time = "2026-03-09T13:14:38.695Z" }, - { url = "https://files.pythonhosted.org/packages/e4/d7/060f45052f2a01ad5762c8fdecd6d7a752b43400dc29ff75cd47225a40fd/kiwisolver-1.5.0-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:8df31fe574b8b3993cc61764f40941111b25c2d9fea13d3ce24a49907cd2d615", size = 123231, upload-time = "2026-03-09T13:14:41.323Z" }, - { url = "https://files.pythonhosted.org/packages/c2/a7/78da680eadd06ff35edef6ef68a1ad273bad3e2a0936c9a885103230aece/kiwisolver-1.5.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:1d49a49ac4cbfb7c1375301cd1ec90169dfeae55ff84710d782260ce77a75a02", size = 66489, upload-time = "2026-03-09T13:14:42.534Z" }, - { url = "https://files.pythonhosted.org/packages/49/b2/97980f3ad4fae37dd7fe31626e2bf75fbf8bdf5d303950ec1fab39a12da8/kiwisolver-1.5.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:0cbe94b69b819209a62cb27bdfa5dc2a8977d8de2f89dfd97ba4f53ed3af754e", size = 64063, upload-time = "2026-03-09T13:14:44.759Z" }, - { url = "https://files.pythonhosted.org/packages/e7/f9/b06c934a6aa8bc91f566bd2a214fd04c30506c2d9e2b6b171953216a65b6/kiwisolver-1.5.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:80aa065ffd378ff784822a6d7c3212f2d5f5e9c3589614b5c228b311fd3063ac", size = 1475913, upload-time = "2026-03-09T13:14:46.247Z" }, - { url = "https://files.pythonhosted.org/packages/6b/f0/f768ae564a710135630672981231320bc403cf9152b5596ec5289de0f106/kiwisolver-1.5.0-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4e7f886f47ab881692f278ae901039a234e4025a68e6dfab514263a0b1c4ae05", size = 1282782, upload-time = "2026-03-09T13:14:48.458Z" }, - { url = "https://files.pythonhosted.org/packages/e2/9f/1de7aad00697325f05238a5f2eafbd487fb637cc27a558b5367a5f37fb7f/kiwisolver-1.5.0-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:5060731cc3ed12ca3a8b57acd4aeca5bbc2f49216dd0bec1650a1acd89486bcd", size = 1300815, upload-time = "2026-03-09T13:14:50.721Z" }, - { url = "https://files.pythonhosted.org/packages/5a/c2/297f25141d2e468e0ce7f7a7b92e0cf8918143a0cbd3422c1ad627e85a06/kiwisolver-1.5.0-cp314-cp314-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:7a4aa69609f40fce3cbc3f87b2061f042eee32f94b8f11db707b66a26461591a", size = 1347925, upload-time = "2026-03-09T13:14:52.304Z" }, - { url = "https://files.pythonhosted.org/packages/b9/d3/f4c73a02eb41520c47610207b21afa8cdd18fdbf64ffd94674ae21c4812d/kiwisolver-1.5.0-cp314-cp314-manylinux_2_39_riscv64.whl", hash = "sha256:d168fda2dbff7b9b5f38e693182d792a938c31db4dac3a80a4888de603c99554", size = 991322, upload-time = "2026-03-09T13:14:54.637Z" }, - { url = "https://files.pythonhosted.org/packages/7b/46/d3f2efef7732fcda98d22bf4ad5d3d71d545167a852ca710a494f4c15343/kiwisolver-1.5.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:413b820229730d358efd838ecbab79902fe97094565fdc80ddb6b0a18c18a581", size = 2232857, upload-time = "2026-03-09T13:14:56.471Z" }, - { url = "https://files.pythonhosted.org/packages/3f/ec/2d9756bf2b6d26ae4349b8d3662fb3993f16d80c1f971c179ce862b9dbae/kiwisolver-1.5.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:5124d1ea754509b09e53738ec185584cc609aae4a3b510aaf4ed6aa047ef9303", size = 2329376, upload-time = "2026-03-09T13:14:58.072Z" }, - { url = "https://files.pythonhosted.org/packages/8f/9f/876a0a0f2260f1bde92e002b3019a5fabc35e0939c7d945e0fa66185eb20/kiwisolver-1.5.0-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:e4415a8db000bf49a6dd1c478bf70062eaacff0f462b92b0ba68791a905861f9", size = 1982549, upload-time = "2026-03-09T13:14:59.668Z" }, - { url = "https://files.pythonhosted.org/packages/6c/4f/ba3624dfac23a64d54ac4179832860cb537c1b0af06024936e82ca4154a0/kiwisolver-1.5.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:d618fd27420381a4f6044faa71f46d8bfd911bd077c555f7138ed88729bfbe79", size = 2494680, upload-time = "2026-03-09T13:15:01.364Z" }, - { url = "https://files.pythonhosted.org/packages/39/b7/97716b190ab98911b20d10bf92eca469121ec483b8ce0edd314f51bc85af/kiwisolver-1.5.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5092eb5b1172947f57d6ea7d89b2f29650414e4293c47707eb499ec07a0ac796", size = 2297905, upload-time = "2026-03-09T13:15:03.925Z" }, - { url = "https://files.pythonhosted.org/packages/a3/36/4e551e8aa55c9188bca9abb5096805edbf7431072b76e2298e34fd3a3008/kiwisolver-1.5.0-cp314-cp314-win_amd64.whl", hash = "sha256:d76e2d8c75051d58177e762164d2e9ab92886534e3a12e795f103524f221dd8e", size = 75086, upload-time = "2026-03-09T13:15:07.775Z" }, - { url = "https://files.pythonhosted.org/packages/70/15/9b90f7df0e31a003c71649cf66ef61c3c1b862f48c81007fa2383c8bd8d7/kiwisolver-1.5.0-cp314-cp314-win_arm64.whl", hash = "sha256:fa6248cd194edff41d7ea9425ced8ca3a6f838bfb295f6f1d6e6bb694a8518df", size = 66577, upload-time = "2026-03-09T13:15:09.139Z" }, - { url = "https://files.pythonhosted.org/packages/17/01/7dc8c5443ff42b38e72731643ed7cf1ed9bf01691ae5cdca98501999ed83/kiwisolver-1.5.0-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:d1ffeb80b5676463d7a7d56acbe8e37a20ce725570e09549fe738e02ca6b7e1e", size = 125794, upload-time = "2026-03-09T13:15:10.525Z" }, - { url = "https://files.pythonhosted.org/packages/46/8a/b4ebe46ebaac6a303417fab10c2e165c557ddaff558f9699d302b256bc53/kiwisolver-1.5.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:bc4d8e252f532ab46a1de9349e2d27b91fce46736a9eedaa37beaca66f574ed4", size = 67646, upload-time = "2026-03-09T13:15:12.016Z" }, - { url = "https://files.pythonhosted.org/packages/60/35/10a844afc5f19d6f567359bf4789e26661755a2f36200d5d1ed8ad0126e5/kiwisolver-1.5.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:6783e069732715ad0c3ce96dbf21dbc2235ab0593f2baf6338101f70371f4028", size = 65511, upload-time = "2026-03-09T13:15:13.311Z" }, - { url = "https://files.pythonhosted.org/packages/f8/8a/685b297052dd041dcebce8e8787b58923b6e78acc6115a0dc9189011c44b/kiwisolver-1.5.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e7c4c09a490dc4d4a7f8cbee56c606a320f9dc28cf92a7157a39d1ce7676a657", size = 1584858, upload-time = "2026-03-09T13:15:15.103Z" }, - { url = "https://files.pythonhosted.org/packages/9e/80/04865e3d4638ac5bddec28908916df4a3075b8c6cc101786a96803188b96/kiwisolver-1.5.0-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2a075bd7bd19c70cf67c8badfa36cf7c5d8de3c9ddb8420c51e10d9c50e94920", size = 1392539, upload-time = "2026-03-09T13:15:16.661Z" }, - { url = "https://files.pythonhosted.org/packages/ba/01/77a19cacc0893fa13fafa46d1bba06fb4dc2360b3292baf4b56d8e067b24/kiwisolver-1.5.0-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:bdd3e53429ff02aa319ba59dfe4ceeec345bf46cf180ec2cf6fd5b942e7975e9", size = 1405310, upload-time = "2026-03-09T13:15:18.229Z" }, - { url = "https://files.pythonhosted.org/packages/53/39/bcaf5d0cca50e604cfa9b4e3ae1d64b50ca1ae5b754122396084599ef903/kiwisolver-1.5.0-cp314-cp314t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3cdcb35dc9d807259c981a85531048ede628eabcffb3239adf3d17463518992d", size = 1456244, upload-time = "2026-03-09T13:15:20.444Z" }, - { url = "https://files.pythonhosted.org/packages/d0/7a/72c187abc6975f6978c3e39b7cf67aeb8b3c0a8f9790aa7fd412855e9e1f/kiwisolver-1.5.0-cp314-cp314t-manylinux_2_39_riscv64.whl", hash = "sha256:70d593af6a6ca332d1df73d519fddb5148edb15cd90d5f0155e3746a6d4fcc65", size = 1073154, upload-time = "2026-03-09T13:15:22.039Z" }, - { url = "https://files.pythonhosted.org/packages/c7/ca/cf5b25783ebbd59143b4371ed0c8428a278abe68d6d0104b01865b1bbd0f/kiwisolver-1.5.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:377815a8616074cabbf3f53354e1d040c35815a134e01d7614b7692e4bf8acfa", size = 2334377, upload-time = "2026-03-09T13:15:23.741Z" }, - { url = "https://files.pythonhosted.org/packages/4a/e5/b1f492adc516796e88751282276745340e2a72dcd0d36cf7173e0daf3210/kiwisolver-1.5.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:0255a027391d52944eae1dbb5d4cc5903f57092f3674e8e544cdd2622826b3f0", size = 2425288, upload-time = "2026-03-09T13:15:25.789Z" }, - { url = "https://files.pythonhosted.org/packages/e6/e5/9b21fbe91a61b8f409d74a26498706e97a48008bfcd1864373d32a6ba31c/kiwisolver-1.5.0-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:012b1eb16e28718fa782b5e61dc6f2da1f0792ca73bd05d54de6cb9561665fc9", size = 2063158, upload-time = "2026-03-09T13:15:27.63Z" }, - { url = "https://files.pythonhosted.org/packages/b1/02/83f47986138310f95ea95531f851b2a62227c11cbc3e690ae1374fe49f0f/kiwisolver-1.5.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:0e3aafb33aed7479377e5e9a82e9d4bf87063741fc99fc7ae48b0f16e32bdd6f", size = 2597260, upload-time = "2026-03-09T13:15:29.421Z" }, - { url = "https://files.pythonhosted.org/packages/07/18/43a5f24608d8c313dd189cf838c8e68d75b115567c6279de7796197cfb6a/kiwisolver-1.5.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:e7a116ae737f0000343218c4edf5bd45893bfeaff0993c0b215d7124c9f77646", size = 2394403, upload-time = "2026-03-09T13:15:31.517Z" }, - { url = "https://files.pythonhosted.org/packages/3b/b5/98222136d839b8afabcaa943b09bd05888c2d36355b7e448550211d1fca4/kiwisolver-1.5.0-cp314-cp314t-win_amd64.whl", hash = "sha256:1dd9b0b119a350976a6d781e7278ec7aca0b201e1a9e2d23d9804afecb6ca681", size = 79687, upload-time = "2026-03-09T13:15:33.204Z" }, - { url = "https://files.pythonhosted.org/packages/99/a2/ca7dc962848040befed12732dff6acae7fb3c4f6fc4272b3f6c9a30b8713/kiwisolver-1.5.0-cp314-cp314t-win_arm64.whl", hash = "sha256:58f812017cd2985c21fbffb4864d59174d4903dd66fa23815e74bbc7a0e2dd57", size = 70032, upload-time = "2026-03-09T13:15:34.411Z" }, - { url = "https://files.pythonhosted.org/packages/1c/fa/2910df836372d8761bb6eff7d8bdcb1613b5c2e03f260efe7abe34d388a7/kiwisolver-1.5.0-graalpy312-graalpy250_312_native-macosx_10_13_x86_64.whl", hash = "sha256:5ae8e62c147495b01a0f4765c878e9bfdf843412446a247e28df59936e99e797", size = 130262, upload-time = "2026-03-09T13:15:35.629Z" }, - { url = "https://files.pythonhosted.org/packages/0f/41/c5f71f9f00aabcc71fee8b7475e3f64747282580c2fe748961ba29b18385/kiwisolver-1.5.0-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:f6764a4ccab3078db14a632420930f6186058750df066b8ea2a7106df91d3203", size = 138036, upload-time = "2026-03-09T13:15:36.894Z" }, - { url = "https://files.pythonhosted.org/packages/fa/06/7399a607f434119c6e1fdc8ec89a8d51ccccadf3341dee4ead6bd14caaf5/kiwisolver-1.5.0-graalpy312-graalpy250_312_native-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c31c13da98624f957b0fb1b5bae5383b2333c2c3f6793d9825dd5ce79b525cb7", size = 194295, upload-time = "2026-03-09T13:15:38.22Z" }, - { url = "https://files.pythonhosted.org/packages/b5/91/53255615acd2a1eaca307ede3c90eb550bae9c94581f8c00081b6b1c8f44/kiwisolver-1.5.0-graalpy312-graalpy250_312_native-win_amd64.whl", hash = "sha256:1f1489f769582498610e015a8ef2d36f28f505ab3096d0e16b4858a9ec214f57", size = 75987, upload-time = "2026-03-09T13:15:39.65Z" }, - { url = "https://files.pythonhosted.org/packages/e9/eb/5fcbbbf9a0e2c3a35effb88831a483345326bbc3a030a3b5b69aee647f84/kiwisolver-1.5.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:ec4c85dc4b687c7f7f15f553ff26a98bfe8c58f5f7f0ac8905f0ba4c7be60232", size = 59532, upload-time = "2026-03-09T13:15:47.047Z" }, - { url = "https://files.pythonhosted.org/packages/c3/9b/e17104555bb4db148fd52327feea1e96be4b88e8e008b029002c281a21ab/kiwisolver-1.5.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:12e91c215a96e39f57989c8912ae761286ac5a9584d04030ceb3368a357f017a", size = 57420, upload-time = "2026-03-09T13:15:48.199Z" }, - { url = "https://files.pythonhosted.org/packages/48/44/2b5b95b7aa39fb2d8d9d956e0f3d5d45aef2ae1d942d4c3ffac2f9cfed1a/kiwisolver-1.5.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:be4a51a55833dc29ab5d7503e7bcb3b3af3402d266018137127450005cdfe737", size = 79892, upload-time = "2026-03-09T13:15:49.694Z" }, - { url = "https://files.pythonhosted.org/packages/52/7d/7157f9bba6b455cfb4632ed411e199fc8b8977642c2b12082e1bd9e6d173/kiwisolver-1.5.0-pp311-pypy311_pp73-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:daae526907e262de627d8f70058a0f64acc9e2641c164c99c8f594b34a799a16", size = 77603, upload-time = "2026-03-09T13:15:50.945Z" }, - { url = "https://files.pythonhosted.org/packages/0a/dd/8050c947d435c8d4bc94e3252f4d8bb8a76cfb424f043a8680be637a57f1/kiwisolver-1.5.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:59cd8683f575d96df5bb48f6add94afc055012c29e28124fcae2b63661b9efb1", size = 73558, upload-time = "2026-03-09T13:15:52.112Z" }, -] - [[package]] name = "lark-oapi" version = "1.5.3" @@ -2784,42 +2227,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/bf/ff/2ece5d735ebfa2af600a53176f2636ae47af2bf934e08effab64f0d1e047/lark_oapi-1.5.3-py3-none-any.whl", hash = "sha256:fda6b32bb38d21b6bdaae94979c600b94c7c521e985adade63a54e4b3e20cc36", size = 6993016, upload-time = "2026-01-27T08:21:49.307Z" }, ] -[[package]] -name = "latex2sympy2-extended" -version = "1.11.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "antlr4-python3-runtime" }, - { name = "sympy" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/30/75/456da2da05f6380ea96e6ea804ab2c03e41fc3ed80052307fe8efe6ea20e/latex2sympy2_extended-1.11.0.tar.gz", hash = "sha256:9695657c81b50abba2636638638618db59f4663ed2a4a12d62cef74a40e28fec", size = 207023, upload-time = "2026-01-10T01:43:21.319Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e9/61/f75cd1fa54d8434276126034aed54dd120747de9a8fa013cdd79545ccbeb/latex2sympy2_extended-1.11.0-py3-none-any.whl", hash = "sha256:aebb77d52ce269e25028e4bea89ddb14d242ba36bcf7b636496fb5fd9728d234", size = 209050, upload-time = "2026-01-10T01:43:19.458Z" }, -] - -[[package]] -name = "litellm" -version = "1.81.15" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "aiohttp", marker = "python_full_version >= '3.12'" }, - { name = "click", marker = "python_full_version >= '3.12'" }, - { name = "fastuuid", marker = "python_full_version >= '3.12'" }, - { name = "httpx", marker = "python_full_version >= '3.12'" }, - { name = "importlib-metadata", marker = "python_full_version >= '3.12'" }, - { name = "jinja2", marker = "python_full_version >= '3.12'" }, - { name = "jsonschema", marker = "python_full_version >= '3.12'" }, - { name = "openai", marker = "python_full_version >= '3.12'" }, - { name = "pydantic", marker = "python_full_version >= '3.12'" }, - { name = "python-dotenv", marker = "python_full_version >= '3.12'" }, - { name = "tiktoken", marker = "python_full_version >= '3.12'" }, - { name = "tokenizers", marker = "python_full_version >= '3.12'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/70/0c/62a0fdc5adae6d205338f9239175aa6a93818e58b75cf000a9c7214a3d9f/litellm-1.81.15.tar.gz", hash = "sha256:a8a6277a53280762051c5818ebc76dd5f036368b9426c6f21795ae7f1ac6ebdc", size = 16597039, upload-time = "2026-02-24T06:52:50.892Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/78/fd/da11826dda0d332e360b9ead6c0c992d612ecb85b00df494823843cfcda3/litellm-1.81.15-py3-none-any.whl", hash = "sha256:2fa253658702509ce09fe0e172e5a47baaadf697fb0f784c7fd4ff665ae76ae1", size = 14682123, upload-time = "2026-02-24T06:52:48.084Z" }, -] - [[package]] name = "markdown" version = "3.10.2" @@ -2924,82 +2331,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/aa/70/bb89f807a6a6704bdc4d6f850d5d32954f6c1965e3248e31455defdf2f30/marshmallow-4.2.2-py3-none-any.whl", hash = "sha256:084a9466111b7ec7183ca3a65aed758739af919fedc5ebdab60fb39d6b4dc121", size = 48454, upload-time = "2026-02-04T15:47:02.013Z" }, ] -[[package]] -name = "math-verify" -version = "0.9.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "latex2sympy2-extended" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/4f/12/b8d13b581e110ac2f724a2351a8361a70fa36d057eb945d6379e8747c256/math_verify-0.9.0.tar.gz", hash = "sha256:45ac6c61344ba056b9e99a660a4bc8d044ed408f730aed68c60435aa5eec4645", size = 60329, upload-time = "2026-01-10T01:48:33.056Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/62/76/6b4969bccc842b6567f7e6ee015684b9428a9b7fcbdf479e73716f43597f/math_verify-0.9.0-py3-none-any.whl", hash = "sha256:3703e7c4885354027fa84409d762a596a2906d1fd4deb78361876bd905a76194", size = 29967, upload-time = "2026-01-10T01:48:31.674Z" }, -] - -[[package]] -name = "matplotlib" -version = "3.10.8" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "contourpy", marker = "python_full_version >= '3.12'" }, - { name = "cycler", marker = "python_full_version >= '3.12'" }, - { name = "fonttools", marker = "python_full_version >= '3.12'" }, - { name = "kiwisolver", marker = "python_full_version >= '3.12'" }, - { name = "numpy", marker = "python_full_version >= '3.12'" }, - { name = "packaging", marker = "python_full_version >= '3.12'" }, - { name = "pillow", marker = "python_full_version >= '3.12'" }, - { name = "pyparsing", marker = "python_full_version >= '3.12'" }, - { name = "python-dateutil", marker = "python_full_version >= '3.12'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/8a/76/d3c6e3a13fe484ebe7718d14e269c9569c4eb0020a968a327acb3b9a8fe6/matplotlib-3.10.8.tar.gz", hash = "sha256:2299372c19d56bcd35cf05a2738308758d32b9eaed2371898d8f5bd33f084aa3", size = 34806269, upload-time = "2025-12-10T22:56:51.155Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/f8/86/de7e3a1cdcfc941483af70609edc06b83e7c8a0e0dc9ac325200a3f4d220/matplotlib-3.10.8-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:6be43b667360fef5c754dda5d25a32e6307a03c204f3c0fc5468b78fa87b4160", size = 8251215, upload-time = "2025-12-10T22:55:16.175Z" }, - { url = "https://files.pythonhosted.org/packages/fd/14/baad3222f424b19ce6ad243c71de1ad9ec6b2e4eb1e458a48fdc6d120401/matplotlib-3.10.8-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a2b336e2d91a3d7006864e0990c83b216fcdca64b5a6484912902cef87313d78", size = 8139625, upload-time = "2025-12-10T22:55:17.712Z" }, - { url = "https://files.pythonhosted.org/packages/8f/a0/7024215e95d456de5883e6732e708d8187d9753a21d32f8ddb3befc0c445/matplotlib-3.10.8-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:efb30e3baaea72ce5928e32bab719ab4770099079d66726a62b11b1ef7273be4", size = 8712614, upload-time = "2025-12-10T22:55:20.8Z" }, - { url = "https://files.pythonhosted.org/packages/5a/f4/b8347351da9a5b3f41e26cf547252d861f685c6867d179a7c9d60ad50189/matplotlib-3.10.8-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d56a1efd5bfd61486c8bc968fa18734464556f0fb8e51690f4ac25d85cbbbbc2", size = 9540997, upload-time = "2025-12-10T22:55:23.258Z" }, - { url = "https://files.pythonhosted.org/packages/9e/c0/c7b914e297efe0bc36917bf216b2acb91044b91e930e878ae12981e461e5/matplotlib-3.10.8-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:238b7ce5717600615c895050239ec955d91f321c209dd110db988500558e70d6", size = 9596825, upload-time = "2025-12-10T22:55:25.217Z" }, - { url = "https://files.pythonhosted.org/packages/6f/d3/a4bbc01c237ab710a1f22b4da72f4ff6d77eb4c7735ea9811a94ae239067/matplotlib-3.10.8-cp311-cp311-win_amd64.whl", hash = "sha256:18821ace09c763ec93aef5eeff087ee493a24051936d7b9ebcad9662f66501f9", size = 8135090, upload-time = "2025-12-10T22:55:27.162Z" }, - { url = "https://files.pythonhosted.org/packages/89/dd/a0b6588f102beab33ca6f5218b31725216577b2a24172f327eaf6417d5c9/matplotlib-3.10.8-cp311-cp311-win_arm64.whl", hash = "sha256:bab485bcf8b1c7d2060b4fcb6fc368a9e6f4cd754c9c2fea281f4be21df394a2", size = 8012377, upload-time = "2025-12-10T22:55:29.185Z" }, - { url = "https://files.pythonhosted.org/packages/9e/67/f997cdcbb514012eb0d10cd2b4b332667997fb5ebe26b8d41d04962fa0e6/matplotlib-3.10.8-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:64fcc24778ca0404ce0cb7b6b77ae1f4c7231cdd60e6778f999ee05cbd581b9a", size = 8260453, upload-time = "2025-12-10T22:55:30.709Z" }, - { url = "https://files.pythonhosted.org/packages/7e/65/07d5f5c7f7c994f12c768708bd2e17a4f01a2b0f44a1c9eccad872433e2e/matplotlib-3.10.8-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b9a5ca4ac220a0cdd1ba6bcba3608547117d30468fefce49bb26f55c1a3d5c58", size = 8148321, upload-time = "2025-12-10T22:55:33.265Z" }, - { url = "https://files.pythonhosted.org/packages/3e/f3/c5195b1ae57ef85339fd7285dfb603b22c8b4e79114bae5f4f0fcf688677/matplotlib-3.10.8-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3ab4aabc72de4ff77b3ec33a6d78a68227bf1123465887f9905ba79184a1cc04", size = 8716944, upload-time = "2025-12-10T22:55:34.922Z" }, - { url = "https://files.pythonhosted.org/packages/00/f9/7638f5cc82ec8a7aa005de48622eecc3ed7c9854b96ba15bd76b7fd27574/matplotlib-3.10.8-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:24d50994d8c5816ddc35411e50a86ab05f575e2530c02752e02538122613371f", size = 9550099, upload-time = "2025-12-10T22:55:36.789Z" }, - { url = "https://files.pythonhosted.org/packages/57/61/78cd5920d35b29fd2a0fe894de8adf672ff52939d2e9b43cb83cd5ce1bc7/matplotlib-3.10.8-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:99eefd13c0dc3b3c1b4d561c1169e65fe47aab7b8158754d7c084088e2329466", size = 9613040, upload-time = "2025-12-10T22:55:38.715Z" }, - { url = "https://files.pythonhosted.org/packages/30/4e/c10f171b6e2f44d9e3a2b96efa38b1677439d79c99357600a62cc1e9594e/matplotlib-3.10.8-cp312-cp312-win_amd64.whl", hash = "sha256:dd80ecb295460a5d9d260df63c43f4afbdd832d725a531f008dad1664f458adf", size = 8142717, upload-time = "2025-12-10T22:55:41.103Z" }, - { url = "https://files.pythonhosted.org/packages/f1/76/934db220026b5fef85f45d51a738b91dea7d70207581063cd9bd8fafcf74/matplotlib-3.10.8-cp312-cp312-win_arm64.whl", hash = "sha256:3c624e43ed56313651bc18a47f838b60d7b8032ed348911c54906b130b20071b", size = 8012751, upload-time = "2025-12-10T22:55:42.684Z" }, - { url = "https://files.pythonhosted.org/packages/3d/b9/15fd5541ef4f5b9a17eefd379356cf12175fe577424e7b1d80676516031a/matplotlib-3.10.8-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3f2e409836d7f5ac2f1c013110a4d50b9f7edc26328c108915f9075d7d7a91b6", size = 8261076, upload-time = "2025-12-10T22:55:44.648Z" }, - { url = "https://files.pythonhosted.org/packages/8d/a0/2ba3473c1b66b9c74dc7107c67e9008cb1782edbe896d4c899d39ae9cf78/matplotlib-3.10.8-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:56271f3dac49a88d7fca5060f004d9d22b865f743a12a23b1e937a0be4818ee1", size = 8148794, upload-time = "2025-12-10T22:55:46.252Z" }, - { url = "https://files.pythonhosted.org/packages/75/97/a471f1c3eb1fd6f6c24a31a5858f443891d5127e63a7788678d14e249aea/matplotlib-3.10.8-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a0a7f52498f72f13d4a25ea70f35f4cb60642b466cbb0a9be951b5bc3f45a486", size = 8718474, upload-time = "2025-12-10T22:55:47.864Z" }, - { url = "https://files.pythonhosted.org/packages/01/be/cd478f4b66f48256f42927d0acbcd63a26a893136456cd079c0cc24fbabf/matplotlib-3.10.8-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:646d95230efb9ca614a7a594d4fcacde0ac61d25e37dd51710b36477594963ce", size = 9549637, upload-time = "2025-12-10T22:55:50.048Z" }, - { url = "https://files.pythonhosted.org/packages/5d/7c/8dc289776eae5109e268c4fb92baf870678dc048a25d4ac903683b86d5bf/matplotlib-3.10.8-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f89c151aab2e2e23cb3fe0acad1e8b82841fd265379c4cecd0f3fcb34c15e0f6", size = 9613678, upload-time = "2025-12-10T22:55:52.21Z" }, - { url = "https://files.pythonhosted.org/packages/64/40/37612487cc8a437d4dd261b32ca21fe2d79510fe74af74e1f42becb1bdb8/matplotlib-3.10.8-cp313-cp313-win_amd64.whl", hash = "sha256:e8ea3e2d4066083e264e75c829078f9e149fa119d27e19acd503de65e0b13149", size = 8142686, upload-time = "2025-12-10T22:55:54.253Z" }, - { url = "https://files.pythonhosted.org/packages/66/52/8d8a8730e968185514680c2a6625943f70269509c3dcfc0dcf7d75928cb8/matplotlib-3.10.8-cp313-cp313-win_arm64.whl", hash = "sha256:c108a1d6fa78a50646029cb6d49808ff0fc1330fda87fa6f6250c6b5369b6645", size = 8012917, upload-time = "2025-12-10T22:55:56.268Z" }, - { url = "https://files.pythonhosted.org/packages/b5/27/51fe26e1062f298af5ef66343d8ef460e090a27fea73036c76c35821df04/matplotlib-3.10.8-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:ad3d9833a64cf48cc4300f2b406c3d0f4f4724a91c0bd5640678a6ba7c102077", size = 8305679, upload-time = "2025-12-10T22:55:57.856Z" }, - { url = "https://files.pythonhosted.org/packages/2c/1e/4de865bc591ac8e3062e835f42dd7fe7a93168d519557837f0e37513f629/matplotlib-3.10.8-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:eb3823f11823deade26ce3b9f40dcb4a213da7a670013929f31d5f5ed1055b22", size = 8198336, upload-time = "2025-12-10T22:55:59.371Z" }, - { url = "https://files.pythonhosted.org/packages/c6/cb/2f7b6e75fb4dce87ef91f60cac4f6e34f4c145ab036a22318ec837971300/matplotlib-3.10.8-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d9050fee89a89ed57b4fb2c1bfac9a3d0c57a0d55aed95949eedbc42070fea39", size = 8731653, upload-time = "2025-12-10T22:56:01.032Z" }, - { url = "https://files.pythonhosted.org/packages/46/b3/bd9c57d6ba670a37ab31fb87ec3e8691b947134b201f881665b28cc039ff/matplotlib-3.10.8-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b44d07310e404ba95f8c25aa5536f154c0a8ec473303535949e52eb71d0a1565", size = 9561356, upload-time = "2025-12-10T22:56:02.95Z" }, - { url = "https://files.pythonhosted.org/packages/c0/3d/8b94a481456dfc9dfe6e39e93b5ab376e50998cddfd23f4ae3b431708f16/matplotlib-3.10.8-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:0a33deb84c15ede243aead39f77e990469fff93ad1521163305095b77b72ce4a", size = 9614000, upload-time = "2025-12-10T22:56:05.411Z" }, - { url = "https://files.pythonhosted.org/packages/bd/cd/bc06149fe5585ba800b189a6a654a75f1f127e8aab02fd2be10df7fa500c/matplotlib-3.10.8-cp313-cp313t-win_amd64.whl", hash = "sha256:3a48a78d2786784cc2413e57397981fb45c79e968d99656706018d6e62e57958", size = 8220043, upload-time = "2025-12-10T22:56:07.551Z" }, - { url = "https://files.pythonhosted.org/packages/e3/de/b22cf255abec916562cc04eef457c13e58a1990048de0c0c3604d082355e/matplotlib-3.10.8-cp313-cp313t-win_arm64.whl", hash = "sha256:15d30132718972c2c074cd14638c7f4592bd98719e2308bccea40e0538bc0cb5", size = 8062075, upload-time = "2025-12-10T22:56:09.178Z" }, - { url = "https://files.pythonhosted.org/packages/3c/43/9c0ff7a2f11615e516c3b058e1e6e8f9614ddeca53faca06da267c48345d/matplotlib-3.10.8-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:b53285e65d4fa4c86399979e956235deb900be5baa7fc1218ea67fbfaeaadd6f", size = 8262481, upload-time = "2025-12-10T22:56:10.885Z" }, - { url = "https://files.pythonhosted.org/packages/6f/ca/e8ae28649fcdf039fda5ef554b40a95f50592a3c47e6f7270c9561c12b07/matplotlib-3.10.8-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:32f8dce744be5569bebe789e46727946041199030db8aeb2954d26013a0eb26b", size = 8151473, upload-time = "2025-12-10T22:56:12.377Z" }, - { url = "https://files.pythonhosted.org/packages/f1/6f/009d129ae70b75e88cbe7e503a12a4c0670e08ed748a902c2568909e9eb5/matplotlib-3.10.8-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4cf267add95b1c88300d96ca837833d4112756045364f5c734a2276038dae27d", size = 9553896, upload-time = "2025-12-10T22:56:14.432Z" }, - { url = "https://files.pythonhosted.org/packages/f5/26/4221a741eb97967bc1fd5e4c52b9aa5a91b2f4ec05b59f6def4d820f9df9/matplotlib-3.10.8-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2cf5bd12cecf46908f286d7838b2abc6c91cda506c0445b8223a7c19a00df008", size = 9824193, upload-time = "2025-12-10T22:56:16.29Z" }, - { url = "https://files.pythonhosted.org/packages/1f/f3/3abf75f38605772cf48a9daf5821cd4f563472f38b4b828c6fba6fa6d06e/matplotlib-3.10.8-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:41703cc95688f2516b480f7f339d8851a6035f18e100ee6a32bc0b8536a12a9c", size = 9615444, upload-time = "2025-12-10T22:56:18.155Z" }, - { url = "https://files.pythonhosted.org/packages/93/a5/de89ac80f10b8dc615807ee1133cd99ac74082581196d4d9590bea10690d/matplotlib-3.10.8-cp314-cp314-win_amd64.whl", hash = "sha256:83d282364ea9f3e52363da262ce32a09dfe241e4080dcedda3c0db059d3c1f11", size = 8272719, upload-time = "2025-12-10T22:56:20.366Z" }, - { url = "https://files.pythonhosted.org/packages/69/ce/b006495c19ccc0a137b48083168a37bd056392dee02f87dba0472f2797fe/matplotlib-3.10.8-cp314-cp314-win_arm64.whl", hash = "sha256:2c1998e92cd5999e295a731bcb2911c75f597d937341f3030cc24ef2733d78a8", size = 8144205, upload-time = "2025-12-10T22:56:22.239Z" }, - { url = "https://files.pythonhosted.org/packages/68/d9/b31116a3a855bd313c6fcdb7226926d59b041f26061c6c5b1be66a08c826/matplotlib-3.10.8-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:b5a2b97dbdc7d4f353ebf343744f1d1f1cca8aa8bfddb4262fcf4306c3761d50", size = 8305785, upload-time = "2025-12-10T22:56:24.218Z" }, - { url = "https://files.pythonhosted.org/packages/1e/90/6effe8103f0272685767ba5f094f453784057072f49b393e3ea178fe70a5/matplotlib-3.10.8-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:3f5c3e4da343bba819f0234186b9004faba952cc420fbc522dc4e103c1985908", size = 8198361, upload-time = "2025-12-10T22:56:26.787Z" }, - { url = "https://files.pythonhosted.org/packages/d7/65/a73188711bea603615fc0baecca1061429ac16940e2385433cc778a9d8e7/matplotlib-3.10.8-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5f62550b9a30afde8c1c3ae450e5eb547d579dd69b25c2fc7a1c67f934c1717a", size = 9561357, upload-time = "2025-12-10T22:56:28.953Z" }, - { url = "https://files.pythonhosted.org/packages/f4/3d/b5c5d5d5be8ce63292567f0e2c43dde9953d3ed86ac2de0a72e93c8f07a1/matplotlib-3.10.8-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:495672de149445ec1b772ff2c9ede9b769e3cb4f0d0aa7fa730d7f59e2d4e1c1", size = 9823610, upload-time = "2025-12-10T22:56:31.455Z" }, - { url = "https://files.pythonhosted.org/packages/4d/4b/e7beb6bbd49f6bae727a12b270a2654d13c397576d25bd6786e47033300f/matplotlib-3.10.8-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:595ba4d8fe983b88f0eec8c26a241e16d6376fe1979086232f481f8f3f67494c", size = 9614011, upload-time = "2025-12-10T22:56:33.85Z" }, - { url = "https://files.pythonhosted.org/packages/7c/e6/76f2813d31f032e65f6f797e3f2f6e4aab95b65015924b1c51370395c28a/matplotlib-3.10.8-cp314-cp314t-win_amd64.whl", hash = "sha256:25d380fe8b1dc32cf8f0b1b448470a77afb195438bafdf1d858bfb876f3edf7b", size = 8362801, upload-time = "2025-12-10T22:56:36.107Z" }, - { url = "https://files.pythonhosted.org/packages/5d/49/d651878698a0b67f23aa28e17f45a6d6dd3d3f933fa29087fa4ce5947b5a/matplotlib-3.10.8-cp314-cp314t-win_arm64.whl", hash = "sha256:113bb52413ea508ce954a02c10ffd0d565f9c3bc7f2eddc27dfe1731e71c7b5f", size = 8192560, upload-time = "2025-12-10T22:56:38.008Z" }, - { url = "https://files.pythonhosted.org/packages/04/30/3afaa31c757f34b7725ab9d2ba8b48b5e89c2019c003e7d0ead143aabc5a/matplotlib-3.10.8-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:6da7c2ce169267d0d066adcf63758f0604aa6c3eebf67458930f9d9b79ad1db1", size = 8249198, upload-time = "2025-12-10T22:56:45.584Z" }, - { url = "https://files.pythonhosted.org/packages/48/2f/6334aec331f57485a642a7c8be03cb286f29111ae71c46c38b363230063c/matplotlib-3.10.8-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:9153c3292705be9f9c64498a8872118540c3f4123d1a1c840172edf262c8be4a", size = 8136817, upload-time = "2025-12-10T22:56:47.339Z" }, - { url = "https://files.pythonhosted.org/packages/73/e4/6d6f14b2a759c622f191b2d67e9075a3f56aaccb3be4bb9bb6890030d0a0/matplotlib-3.10.8-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ae029229a57cd1e8fe542485f27e7ca7b23aa9e8944ddb4985d0bc444f1eca2", size = 8713867, upload-time = "2025-12-10T22:56:48.954Z" }, -] - [[package]] name = "mautrix" version = "0.21.0" @@ -3260,35 +2591,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/81/08/7036c080d7117f28a4af526d794aab6a84463126db031b007717c1a6676e/multidict-6.7.1-py3-none-any.whl", hash = "sha256:55d97cc6dae627efa6a6e548885712d4864b81110ac76fa4e534c03819fa4a56", size = 12319, upload-time = "2026-01-26T02:46:44.004Z" }, ] -[[package]] -name = "multiprocess" -version = "0.70.19" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "dill" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/a2/f2/e783ac7f2aeeed14e9e12801f22529cc7e6b7ab80928d6dcce4e9f00922d/multiprocess-0.70.19.tar.gz", hash = "sha256:952021e0e6c55a4a9fe4cd787895b86e239a40e76802a789d6305398d3975897", size = 2079989, upload-time = "2026-01-19T06:47:39.744Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7e/aa/714635c727dbfc251139226fa4eaf1b07f00dc12d9cd2eb25f931adaf873/multiprocess-0.70.19-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:1bbf1b69af1cf64cd05f65337d9215b88079ec819cd0ea7bac4dab84e162efe7", size = 144743, upload-time = "2026-01-19T06:47:24.562Z" }, - { url = "https://files.pythonhosted.org/packages/0f/e1/155f6abf5e6b5d9cef29b6d0167c180846157a4aca9b9bee1a217f67c959/multiprocess-0.70.19-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:5be9ec7f0c1c49a4f4a6fd20d5dda4aeabc2d39a50f4ad53720f1cd02b3a7c2e", size = 144738, upload-time = "2026-01-19T06:47:26.636Z" }, - { url = "https://files.pythonhosted.org/packages/af/cb/f421c2869d75750a4f32301cc20c4b63fab6376e9a75c8e5e655bdeb3d9b/multiprocess-0.70.19-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:1c3dce098845a0db43b32a0b76a228ca059a668071cfeaa0f40c36c0b1585d45", size = 144741, upload-time = "2026-01-19T06:47:27.985Z" }, - { url = "https://files.pythonhosted.org/packages/e3/45/8004d1e6b9185c1a444d6b55ac5682acf9d98035e54386d967366035a03a/multiprocess-0.70.19-py310-none-any.whl", hash = "sha256:97404393419dcb2a8385910864eedf47a3cadf82c66345b44f036420eb0b5d87", size = 134948, upload-time = "2026-01-19T06:47:32.325Z" }, - { url = "https://files.pythonhosted.org/packages/86/c2/dec9722dc3474c164a0b6bcd9a7ed7da542c98af8cabce05374abab35edd/multiprocess-0.70.19-py311-none-any.whl", hash = "sha256:928851ae7973aea4ce0eaf330bbdafb2e01398a91518d5c8818802845564f45c", size = 144457, upload-time = "2026-01-19T06:47:33.711Z" }, - { url = "https://files.pythonhosted.org/packages/71/70/38998b950a97ea279e6bd657575d22d1a2047256caf707d9a10fbce4f065/multiprocess-0.70.19-py312-none-any.whl", hash = "sha256:3a56c0e85dd5025161bac5ce138dcac1e49174c7d8e74596537e729fd5c53c28", size = 150281, upload-time = "2026-01-19T06:47:35.037Z" }, - { url = "https://files.pythonhosted.org/packages/7f/74/d2c27e03cb84251dfe7249b8e82923643c6d48fa4883b9476b025e7dc7eb/multiprocess-0.70.19-py313-none-any.whl", hash = "sha256:8d5eb4ec5017ba2fab4e34a747c6d2c2b6fecfe9e7236e77988db91580ada952", size = 156414, upload-time = "2026-01-19T06:47:35.915Z" }, - { url = "https://files.pythonhosted.org/packages/a0/61/af9115673a5870fd885247e2f1b68c4f1197737da315b520a91c757a861a/multiprocess-0.70.19-py314-none-any.whl", hash = "sha256:e8cc7fbdff15c0613f0a1f1f8744bef961b0a164c0ca29bdff53e9d2d93c5e5f", size = 160318, upload-time = "2026-01-19T06:47:37.497Z" }, - { url = "https://files.pythonhosted.org/packages/7e/82/69e539c4c2027f1e1697e09aaa2449243085a0edf81ae2c6341e84d769b6/multiprocess-0.70.19-py39-none-any.whl", hash = "sha256:0d4b4397ed669d371c81dcd1ef33fd384a44d6c3de1bd0ca7ac06d837720d3c5", size = 133477, upload-time = "2026-01-19T06:47:38.619Z" }, -] - -[[package]] -name = "narwhals" -version = "2.18.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/59/96/45218c2fdec4c9f22178f905086e85ef1a6d63862dcc3cd68eb60f1867f5/narwhals-2.18.1.tar.gz", hash = "sha256:652a1fcc9d432bbf114846688884c215f17eb118aa640b7419295d2f910d2a8b", size = 620578, upload-time = "2026-03-24T15:11:25.456Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/3f/c3/06490e98393dcb4d6ce2bf331a39335375c300afaef526897881fbeae6ab/narwhals-2.18.1-py3-none-any.whl", hash = "sha256:a0a8bb80205323851338888ba3a12b4f65d352362c8a94be591244faf36504ad", size = 444952, upload-time = "2026-03-24T15:11:23.801Z" }, -] - [[package]] name = "nest-asyncio" version = "1.6.0" @@ -3298,21 +2600,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a0/c4/c2971a3ba4c6103a3d10c4b0f24f461ddc027f0f09763220cf35ca1401b3/nest_asyncio-1.6.0-py3-none-any.whl", hash = "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c", size = 5195, upload-time = "2024-01-21T14:25:17.223Z" }, ] -[[package]] -name = "nltk" -version = "3.9.4" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "click" }, - { name = "joblib" }, - { name = "regex" }, - { name = "tqdm" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/74/a1/b3b4adf15585a5bc4c357adde150c01ebeeb642173ded4d871e89468767c/nltk-3.9.4.tar.gz", hash = "sha256:ed03bc098a40481310320808b2db712d95d13ca65b27372f8a403949c8b523d0", size = 2946864, upload-time = "2026-03-24T06:13:40.641Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/9d/91/04e965f8e717ba0ab4bdca5c112deeab11c9e750d94c4d4602f050295d39/nltk-3.9.4-py3-none-any.whl", hash = "sha256:f2fa301c3a12718ce4a0e9305c5675299da5ad9e26068218b69d692fda84828f", size = 1552087, upload-time = "2026-03-24T06:13:38.47Z" }, -] - [[package]] name = "numpy" version = "2.4.3" @@ -3651,60 +2938,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529", size = 74366, upload-time = "2026-01-21T20:50:37.788Z" }, ] -[[package]] -name = "pandas" -version = "2.3.3" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "numpy" }, - { name = "python-dateutil" }, - { name = "pytz" }, - { name = "tzdata" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/33/01/d40b85317f86cf08d853a4f495195c73815fdf205eef3993821720274518/pandas-2.3.3.tar.gz", hash = "sha256:e05e1af93b977f7eafa636d043f9f94c7ee3ac81af99c13508215942e64c993b", size = 4495223, upload-time = "2025-09-29T23:34:51.853Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c1/fa/7ac648108144a095b4fb6aa3de1954689f7af60a14cf25583f4960ecb878/pandas-2.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:602b8615ebcc4a0c1751e71840428ddebeb142ec02c786e8ad6b1ce3c8dec523", size = 11578790, upload-time = "2025-09-29T23:18:30.065Z" }, - { url = "https://files.pythonhosted.org/packages/9b/35/74442388c6cf008882d4d4bdfc4109be87e9b8b7ccd097ad1e7f006e2e95/pandas-2.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8fe25fc7b623b0ef6b5009149627e34d2a4657e880948ec3c840e9402e5c1b45", size = 10833831, upload-time = "2025-09-29T23:38:56.071Z" }, - { url = "https://files.pythonhosted.org/packages/fe/e4/de154cbfeee13383ad58d23017da99390b91d73f8c11856f2095e813201b/pandas-2.3.3-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b468d3dad6ff947df92dcb32ede5b7bd41a9b3cceef0a30ed925f6d01fb8fa66", size = 12199267, upload-time = "2025-09-29T23:18:41.627Z" }, - { url = "https://files.pythonhosted.org/packages/bf/c9/63f8d545568d9ab91476b1818b4741f521646cbdd151c6efebf40d6de6f7/pandas-2.3.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b98560e98cb334799c0b07ca7967ac361a47326e9b4e5a7dfb5ab2b1c9d35a1b", size = 12789281, upload-time = "2025-09-29T23:18:56.834Z" }, - { url = "https://files.pythonhosted.org/packages/f2/00/a5ac8c7a0e67fd1a6059e40aa08fa1c52cc00709077d2300e210c3ce0322/pandas-2.3.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1d37b5848ba49824e5c30bedb9c830ab9b7751fd049bc7914533e01c65f79791", size = 13240453, upload-time = "2025-09-29T23:19:09.247Z" }, - { url = "https://files.pythonhosted.org/packages/27/4d/5c23a5bc7bd209231618dd9e606ce076272c9bc4f12023a70e03a86b4067/pandas-2.3.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:db4301b2d1f926ae677a751eb2bd0e8c5f5319c9cb3f88b0becbbb0b07b34151", size = 13890361, upload-time = "2025-09-29T23:19:25.342Z" }, - { url = "https://files.pythonhosted.org/packages/8e/59/712db1d7040520de7a4965df15b774348980e6df45c129b8c64d0dbe74ef/pandas-2.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:f086f6fe114e19d92014a1966f43a3e62285109afe874f067f5abbdcbb10e59c", size = 11348702, upload-time = "2025-09-29T23:19:38.296Z" }, - { url = "https://files.pythonhosted.org/packages/9c/fb/231d89e8637c808b997d172b18e9d4a4bc7bf31296196c260526055d1ea0/pandas-2.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d21f6d74eb1725c2efaa71a2bfc661a0689579b58e9c0ca58a739ff0b002b53", size = 11597846, upload-time = "2025-09-29T23:19:48.856Z" }, - { url = "https://files.pythonhosted.org/packages/5c/bd/bf8064d9cfa214294356c2d6702b716d3cf3bb24be59287a6a21e24cae6b/pandas-2.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3fd2f887589c7aa868e02632612ba39acb0b8948faf5cc58f0850e165bd46f35", size = 10729618, upload-time = "2025-09-29T23:39:08.659Z" }, - { url = "https://files.pythonhosted.org/packages/57/56/cf2dbe1a3f5271370669475ead12ce77c61726ffd19a35546e31aa8edf4e/pandas-2.3.3-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ecaf1e12bdc03c86ad4a7ea848d66c685cb6851d807a26aa245ca3d2017a1908", size = 11737212, upload-time = "2025-09-29T23:19:59.765Z" }, - { url = "https://files.pythonhosted.org/packages/e5/63/cd7d615331b328e287d8233ba9fdf191a9c2d11b6af0c7a59cfcec23de68/pandas-2.3.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b3d11d2fda7eb164ef27ffc14b4fcab16a80e1ce67e9f57e19ec0afaf715ba89", size = 12362693, upload-time = "2025-09-29T23:20:14.098Z" }, - { url = "https://files.pythonhosted.org/packages/a6/de/8b1895b107277d52f2b42d3a6806e69cfef0d5cf1d0ba343470b9d8e0a04/pandas-2.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a68e15f780eddf2b07d242e17a04aa187a7ee12b40b930bfdd78070556550e98", size = 12771002, upload-time = "2025-09-29T23:20:26.76Z" }, - { url = "https://files.pythonhosted.org/packages/87/21/84072af3187a677c5893b170ba2c8fbe450a6ff911234916da889b698220/pandas-2.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:371a4ab48e950033bcf52b6527eccb564f52dc826c02afd9a1bc0ab731bba084", size = 13450971, upload-time = "2025-09-29T23:20:41.344Z" }, - { url = "https://files.pythonhosted.org/packages/86/41/585a168330ff063014880a80d744219dbf1dd7a1c706e75ab3425a987384/pandas-2.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:a16dcec078a01eeef8ee61bf64074b4e524a2a3f4b3be9326420cabe59c4778b", size = 10992722, upload-time = "2025-09-29T23:20:54.139Z" }, - { url = "https://files.pythonhosted.org/packages/cd/4b/18b035ee18f97c1040d94debd8f2e737000ad70ccc8f5513f4eefad75f4b/pandas-2.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:56851a737e3470de7fa88e6131f41281ed440d29a9268dcbf0002da5ac366713", size = 11544671, upload-time = "2025-09-29T23:21:05.024Z" }, - { url = "https://files.pythonhosted.org/packages/31/94/72fac03573102779920099bcac1c3b05975c2cb5f01eac609faf34bed1ca/pandas-2.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bdcd9d1167f4885211e401b3036c0c8d9e274eee67ea8d0758a256d60704cfe8", size = 10680807, upload-time = "2025-09-29T23:21:15.979Z" }, - { url = "https://files.pythonhosted.org/packages/16/87/9472cf4a487d848476865321de18cc8c920b8cab98453ab79dbbc98db63a/pandas-2.3.3-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e32e7cc9af0f1cc15548288a51a3b681cc2a219faa838e995f7dc53dbab1062d", size = 11709872, upload-time = "2025-09-29T23:21:27.165Z" }, - { url = "https://files.pythonhosted.org/packages/15/07/284f757f63f8a8d69ed4472bfd85122bd086e637bf4ed09de572d575a693/pandas-2.3.3-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:318d77e0e42a628c04dc56bcef4b40de67918f7041c2b061af1da41dcff670ac", size = 12306371, upload-time = "2025-09-29T23:21:40.532Z" }, - { url = "https://files.pythonhosted.org/packages/33/81/a3afc88fca4aa925804a27d2676d22dcd2031c2ebe08aabd0ae55b9ff282/pandas-2.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4e0a175408804d566144e170d0476b15d78458795bb18f1304fb94160cabf40c", size = 12765333, upload-time = "2025-09-29T23:21:55.77Z" }, - { url = "https://files.pythonhosted.org/packages/8d/0f/b4d4ae743a83742f1153464cf1a8ecfafc3ac59722a0b5c8602310cb7158/pandas-2.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:93c2d9ab0fc11822b5eece72ec9587e172f63cff87c00b062f6e37448ced4493", size = 13418120, upload-time = "2025-09-29T23:22:10.109Z" }, - { url = "https://files.pythonhosted.org/packages/4f/c7/e54682c96a895d0c808453269e0b5928a07a127a15704fedb643e9b0a4c8/pandas-2.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:f8bfc0e12dc78f777f323f55c58649591b2cd0c43534e8355c51d3fede5f4dee", size = 10993991, upload-time = "2025-09-29T23:25:04.889Z" }, - { url = "https://files.pythonhosted.org/packages/f9/ca/3f8d4f49740799189e1395812f3bf23b5e8fc7c190827d55a610da72ce55/pandas-2.3.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:75ea25f9529fdec2d2e93a42c523962261e567d250b0013b16210e1d40d7c2e5", size = 12048227, upload-time = "2025-09-29T23:22:24.343Z" }, - { url = "https://files.pythonhosted.org/packages/0e/5a/f43efec3e8c0cc92c4663ccad372dbdff72b60bdb56b2749f04aa1d07d7e/pandas-2.3.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:74ecdf1d301e812db96a465a525952f4dde225fdb6d8e5a521d47e1f42041e21", size = 11411056, upload-time = "2025-09-29T23:22:37.762Z" }, - { url = "https://files.pythonhosted.org/packages/46/b1/85331edfc591208c9d1a63a06baa67b21d332e63b7a591a5ba42a10bb507/pandas-2.3.3-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6435cb949cb34ec11cc9860246ccb2fdc9ecd742c12d3304989017d53f039a78", size = 11645189, upload-time = "2025-09-29T23:22:51.688Z" }, - { url = "https://files.pythonhosted.org/packages/44/23/78d645adc35d94d1ac4f2a3c4112ab6f5b8999f4898b8cdf01252f8df4a9/pandas-2.3.3-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:900f47d8f20860de523a1ac881c4c36d65efcb2eb850e6948140fa781736e110", size = 12121912, upload-time = "2025-09-29T23:23:05.042Z" }, - { url = "https://files.pythonhosted.org/packages/53/da/d10013df5e6aaef6b425aa0c32e1fc1f3e431e4bcabd420517dceadce354/pandas-2.3.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a45c765238e2ed7d7c608fc5bc4a6f88b642f2f01e70c0c23d2224dd21829d86", size = 12712160, upload-time = "2025-09-29T23:23:28.57Z" }, - { url = "https://files.pythonhosted.org/packages/bd/17/e756653095a083d8a37cbd816cb87148debcfcd920129b25f99dd8d04271/pandas-2.3.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c4fc4c21971a1a9f4bdb4c73978c7f7256caa3e62b323f70d6cb80db583350bc", size = 13199233, upload-time = "2025-09-29T23:24:24.876Z" }, - { url = "https://files.pythonhosted.org/packages/04/fd/74903979833db8390b73b3a8a7d30d146d710bd32703724dd9083950386f/pandas-2.3.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:ee15f284898e7b246df8087fc82b87b01686f98ee67d85a17b7ab44143a3a9a0", size = 11540635, upload-time = "2025-09-29T23:25:52.486Z" }, - { url = "https://files.pythonhosted.org/packages/21/00/266d6b357ad5e6d3ad55093a7e8efc7dd245f5a842b584db9f30b0f0a287/pandas-2.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1611aedd912e1ff81ff41c745822980c49ce4a7907537be8692c8dbc31924593", size = 10759079, upload-time = "2025-09-29T23:26:33.204Z" }, - { url = "https://files.pythonhosted.org/packages/ca/05/d01ef80a7a3a12b2f8bbf16daba1e17c98a2f039cbc8e2f77a2c5a63d382/pandas-2.3.3-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6d2cefc361461662ac48810cb14365a365ce864afe85ef1f447ff5a1e99ea81c", size = 11814049, upload-time = "2025-09-29T23:27:15.384Z" }, - { url = "https://files.pythonhosted.org/packages/15/b2/0e62f78c0c5ba7e3d2c5945a82456f4fac76c480940f805e0b97fcbc2f65/pandas-2.3.3-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ee67acbbf05014ea6c763beb097e03cd629961c8a632075eeb34247120abcb4b", size = 12332638, upload-time = "2025-09-29T23:27:51.625Z" }, - { url = "https://files.pythonhosted.org/packages/c5/33/dd70400631b62b9b29c3c93d2feee1d0964dc2bae2e5ad7a6c73a7f25325/pandas-2.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c46467899aaa4da076d5abc11084634e2d197e9460643dd455ac3db5856b24d6", size = 12886834, upload-time = "2025-09-29T23:28:21.289Z" }, - { url = "https://files.pythonhosted.org/packages/d3/18/b5d48f55821228d0d2692b34fd5034bb185e854bdb592e9c640f6290e012/pandas-2.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:6253c72c6a1d990a410bc7de641d34053364ef8bcd3126f7e7450125887dffe3", size = 13409925, upload-time = "2025-09-29T23:28:58.261Z" }, - { url = "https://files.pythonhosted.org/packages/a6/3d/124ac75fcd0ecc09b8fdccb0246ef65e35b012030defb0e0eba2cbbbe948/pandas-2.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:1b07204a219b3b7350abaae088f451860223a52cfb8a6c53358e7948735158e5", size = 11109071, upload-time = "2025-09-29T23:32:27.484Z" }, - { url = "https://files.pythonhosted.org/packages/89/9c/0e21c895c38a157e0faa1fb64587a9226d6dd46452cac4532d80c3c4a244/pandas-2.3.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2462b1a365b6109d275250baaae7b760fd25c726aaca0054649286bcfbb3e8ec", size = 12048504, upload-time = "2025-09-29T23:29:31.47Z" }, - { url = "https://files.pythonhosted.org/packages/d7/82/b69a1c95df796858777b68fbe6a81d37443a33319761d7c652ce77797475/pandas-2.3.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0242fe9a49aa8b4d78a4fa03acb397a58833ef6199e9aa40a95f027bb3a1b6e7", size = 11410702, upload-time = "2025-09-29T23:29:54.591Z" }, - { url = "https://files.pythonhosted.org/packages/f9/88/702bde3ba0a94b8c73a0181e05144b10f13f29ebfc2150c3a79062a8195d/pandas-2.3.3-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a21d830e78df0a515db2b3d2f5570610f5e6bd2e27749770e8bb7b524b89b450", size = 11634535, upload-time = "2025-09-29T23:30:21.003Z" }, - { url = "https://files.pythonhosted.org/packages/a4/1e/1bac1a839d12e6a82ec6cb40cda2edde64a2013a66963293696bbf31fbbb/pandas-2.3.3-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2e3ebdb170b5ef78f19bfb71b0dc5dc58775032361fa188e814959b74d726dd5", size = 12121582, upload-time = "2025-09-29T23:30:43.391Z" }, - { url = "https://files.pythonhosted.org/packages/44/91/483de934193e12a3b1d6ae7c8645d083ff88dec75f46e827562f1e4b4da6/pandas-2.3.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:d051c0e065b94b7a3cea50eb1ec32e912cd96dba41647eb24104b6c6c14c5788", size = 12699963, upload-time = "2025-09-29T23:31:10.009Z" }, - { url = "https://files.pythonhosted.org/packages/70/44/5191d2e4026f86a2a109053e194d3ba7a31a2d10a9c2348368c63ed4e85a/pandas-2.3.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3869faf4bd07b3b66a9f462417d0ca3a9df29a9f6abd5d0d0dbab15dac7abe87", size = 13202175, upload-time = "2025-09-29T23:31:59.173Z" }, -] - [[package]] name = "parallel-web" version = "0.4.2" @@ -3722,115 +2955,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a0/3e/2218fa29637781b8e7ac35a928108ff2614ddd40879389d3af2caa725af5/parallel_web-0.4.2-py3-none-any.whl", hash = "sha256:aa3a4a9aecc08972c5ce9303271d4917903373dff4dd277d9a3e30f9cff53346", size = 144012, upload-time = "2026-03-09T22:24:33.979Z" }, ] -[[package]] -name = "pillow" -version = "12.1.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/1f/42/5c74462b4fd957fcd7b13b04fb3205ff8349236ea74c7c375766d6c82288/pillow-12.1.1.tar.gz", hash = "sha256:9ad8fa5937ab05218e2b6a4cff30295ad35afd2f83ac592e68c0d871bb0fdbc4", size = 46980264, upload-time = "2026-02-11T04:23:07.146Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/2b/46/5da1ec4a5171ee7bf1a0efa064aba70ba3d6e0788ce3f5acd1375d23c8c0/pillow-12.1.1-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:e879bb6cd5c73848ef3b2b48b8af9ff08c5b71ecda8048b7dd22d8a33f60be32", size = 5304084, upload-time = "2026-02-11T04:20:27.501Z" }, - { url = "https://files.pythonhosted.org/packages/78/93/a29e9bc02d1cf557a834da780ceccd54e02421627200696fcf805ebdc3fb/pillow-12.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:365b10bb9417dd4498c0e3b128018c4a624dc11c7b97d8cc54effe3b096f4c38", size = 4657866, upload-time = "2026-02-11T04:20:29.827Z" }, - { url = "https://files.pythonhosted.org/packages/13/84/583a4558d492a179d31e4aae32eadce94b9acf49c0337c4ce0b70e0a01f2/pillow-12.1.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d4ce8e329c93845720cd2014659ca67eac35f6433fd3050393d85f3ecef0dad5", size = 6232148, upload-time = "2026-02-11T04:20:31.329Z" }, - { url = "https://files.pythonhosted.org/packages/d5/e2/53c43334bbbb2d3b938978532fbda8e62bb6e0b23a26ce8592f36bcc4987/pillow-12.1.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fc354a04072b765eccf2204f588a7a532c9511e8b9c7f900e1b64e3e33487090", size = 8038007, upload-time = "2026-02-11T04:20:34.225Z" }, - { url = "https://files.pythonhosted.org/packages/b8/a6/3d0e79c8a9d58150dd98e199d7c1c56861027f3829a3a60b3c2784190180/pillow-12.1.1-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7e7976bf1910a8116b523b9f9f58bf410f3e8aa330cd9a2bb2953f9266ab49af", size = 6345418, upload-time = "2026-02-11T04:20:35.858Z" }, - { url = "https://files.pythonhosted.org/packages/a2/c8/46dfeac5825e600579157eea177be43e2f7ff4a99da9d0d0a49533509ac5/pillow-12.1.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:597bd9c8419bc7c6af5604e55847789b69123bbe25d65cc6ad3012b4f3c98d8b", size = 7034590, upload-time = "2026-02-11T04:20:37.91Z" }, - { url = "https://files.pythonhosted.org/packages/af/bf/e6f65d3db8a8bbfeaf9e13cc0417813f6319863a73de934f14b2229ada18/pillow-12.1.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2c1fc0f2ca5f96a3c8407e41cca26a16e46b21060fe6d5b099d2cb01412222f5", size = 6458655, upload-time = "2026-02-11T04:20:39.496Z" }, - { url = "https://files.pythonhosted.org/packages/f9/c2/66091f3f34a25894ca129362e510b956ef26f8fb67a0e6417bc5744e56f1/pillow-12.1.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:578510d88c6229d735855e1f278aa305270438d36a05031dfaae5067cc8eb04d", size = 7159286, upload-time = "2026-02-11T04:20:41.139Z" }, - { url = "https://files.pythonhosted.org/packages/7b/5a/24bc8eb526a22f957d0cec6243146744966d40857e3d8deb68f7902ca6c1/pillow-12.1.1-cp311-cp311-win32.whl", hash = "sha256:7311c0a0dcadb89b36b7025dfd8326ecfa36964e29913074d47382706e516a7c", size = 6328663, upload-time = "2026-02-11T04:20:43.184Z" }, - { url = "https://files.pythonhosted.org/packages/31/03/bef822e4f2d8f9d7448c133d0a18185d3cce3e70472774fffefe8b0ed562/pillow-12.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:fbfa2a7c10cc2623f412753cddf391c7f971c52ca40a3f65dc5039b2939e8563", size = 7031448, upload-time = "2026-02-11T04:20:44.696Z" }, - { url = "https://files.pythonhosted.org/packages/49/70/f76296f53610bd17b2e7d31728b8b7825e3ac3b5b3688b51f52eab7c0818/pillow-12.1.1-cp311-cp311-win_arm64.whl", hash = "sha256:b81b5e3511211631b3f672a595e3221252c90af017e399056d0faabb9538aa80", size = 2453651, upload-time = "2026-02-11T04:20:46.243Z" }, - { url = "https://files.pythonhosted.org/packages/07/d3/8df65da0d4df36b094351dce696f2989bec731d4f10e743b1c5f4da4d3bf/pillow-12.1.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ab323b787d6e18b3d91a72fc99b1a2c28651e4358749842b8f8dfacd28ef2052", size = 5262803, upload-time = "2026-02-11T04:20:47.653Z" }, - { url = "https://files.pythonhosted.org/packages/d6/71/5026395b290ff404b836e636f51d7297e6c83beceaa87c592718747e670f/pillow-12.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:adebb5bee0f0af4909c30db0d890c773d1a92ffe83da908e2e9e720f8edf3984", size = 4657601, upload-time = "2026-02-11T04:20:49.328Z" }, - { url = "https://files.pythonhosted.org/packages/b1/2e/1001613d941c67442f745aff0f7cc66dd8df9a9c084eb497e6a543ee6f7e/pillow-12.1.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bb66b7cc26f50977108790e2456b7921e773f23db5630261102233eb355a3b79", size = 6234995, upload-time = "2026-02-11T04:20:51.032Z" }, - { url = "https://files.pythonhosted.org/packages/07/26/246ab11455b2549b9233dbd44d358d033a2f780fa9007b61a913c5b2d24e/pillow-12.1.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:aee2810642b2898bb187ced9b349e95d2a7272930796e022efaf12e99dccd293", size = 8045012, upload-time = "2026-02-11T04:20:52.882Z" }, - { url = "https://files.pythonhosted.org/packages/b2/8b/07587069c27be7535ac1fe33874e32de118fbd34e2a73b7f83436a88368c/pillow-12.1.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a0b1cd6232e2b618adcc54d9882e4e662a089d5768cd188f7c245b4c8c44a397", size = 6349638, upload-time = "2026-02-11T04:20:54.444Z" }, - { url = "https://files.pythonhosted.org/packages/ff/79/6df7b2ee763d619cda2fb4fea498e5f79d984dae304d45a8999b80d6cf5c/pillow-12.1.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7aac39bcf8d4770d089588a2e1dd111cbaa42df5a94be3114222057d68336bd0", size = 7041540, upload-time = "2026-02-11T04:20:55.97Z" }, - { url = "https://files.pythonhosted.org/packages/2c/5e/2ba19e7e7236d7529f4d873bdaf317a318896bac289abebd4bb00ef247f0/pillow-12.1.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ab174cd7d29a62dd139c44bf74b698039328f45cb03b4596c43473a46656b2f3", size = 6462613, upload-time = "2026-02-11T04:20:57.542Z" }, - { url = "https://files.pythonhosted.org/packages/03/03/31216ec124bb5c3dacd74ce8efff4cc7f52643653bad4825f8f08c697743/pillow-12.1.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:339ffdcb7cbeaa08221cd401d517d4b1fe7a9ed5d400e4a8039719238620ca35", size = 7166745, upload-time = "2026-02-11T04:20:59.196Z" }, - { url = "https://files.pythonhosted.org/packages/1f/e7/7c4552d80052337eb28653b617eafdef39adfb137c49dd7e831b8dc13bc5/pillow-12.1.1-cp312-cp312-win32.whl", hash = "sha256:5d1f9575a12bed9e9eedd9a4972834b08c97a352bd17955ccdebfeca5913fa0a", size = 6328823, upload-time = "2026-02-11T04:21:01.385Z" }, - { url = "https://files.pythonhosted.org/packages/3d/17/688626d192d7261bbbf98846fc98995726bddc2c945344b65bec3a29d731/pillow-12.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:21329ec8c96c6e979cd0dfd29406c40c1d52521a90544463057d2aaa937d66a6", size = 7033367, upload-time = "2026-02-11T04:21:03.536Z" }, - { url = "https://files.pythonhosted.org/packages/ed/fe/a0ef1f73f939b0eca03ee2c108d0043a87468664770612602c63266a43c4/pillow-12.1.1-cp312-cp312-win_arm64.whl", hash = "sha256:af9a332e572978f0218686636610555ae3defd1633597be015ed50289a03c523", size = 2453811, upload-time = "2026-02-11T04:21:05.116Z" }, - { url = "https://files.pythonhosted.org/packages/d5/11/6db24d4bd7685583caeae54b7009584e38da3c3d4488ed4cd25b439de486/pillow-12.1.1-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:d242e8ac078781f1de88bf823d70c1a9b3c7950a44cdf4b7c012e22ccbcd8e4e", size = 4062689, upload-time = "2026-02-11T04:21:06.804Z" }, - { url = "https://files.pythonhosted.org/packages/33/c0/ce6d3b1fe190f0021203e0d9b5b99e57843e345f15f9ef22fcd43842fd21/pillow-12.1.1-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:02f84dfad02693676692746df05b89cf25597560db2857363a208e393429f5e9", size = 4138535, upload-time = "2026-02-11T04:21:08.452Z" }, - { url = "https://files.pythonhosted.org/packages/a0/c6/d5eb6a4fb32a3f9c21a8c7613ec706534ea1cf9f4b3663e99f0d83f6fca8/pillow-12.1.1-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:e65498daf4b583091ccbb2556c7000abf0f3349fcd57ef7adc9a84a394ed29f6", size = 3601364, upload-time = "2026-02-11T04:21:10.194Z" }, - { url = "https://files.pythonhosted.org/packages/14/a1/16c4b823838ba4c9c52c0e6bbda903a3fe5a1bdbf1b8eb4fff7156f3e318/pillow-12.1.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:6c6db3b84c87d48d0088943bf33440e0c42370b99b1c2a7989216f7b42eede60", size = 5262561, upload-time = "2026-02-11T04:21:11.742Z" }, - { url = "https://files.pythonhosted.org/packages/bb/ad/ad9dc98ff24f485008aa5cdedaf1a219876f6f6c42a4626c08bc4e80b120/pillow-12.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8b7e5304e34942bf62e15184219a7b5ad4ff7f3bb5cca4d984f37df1a0e1aee2", size = 4657460, upload-time = "2026-02-11T04:21:13.786Z" }, - { url = "https://files.pythonhosted.org/packages/9e/1b/f1a4ea9a895b5732152789326202a82464d5254759fbacae4deea3069334/pillow-12.1.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:18e5bddd742a44b7e6b1e773ab5db102bd7a94c32555ba656e76d319d19c3850", size = 6232698, upload-time = "2026-02-11T04:21:15.949Z" }, - { url = "https://files.pythonhosted.org/packages/95/f4/86f51b8745070daf21fd2e5b1fe0eb35d4db9ca26e6d58366562fb56a743/pillow-12.1.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fc44ef1f3de4f45b50ccf9136999d71abb99dca7706bc75d222ed350b9fd2289", size = 8041706, upload-time = "2026-02-11T04:21:17.723Z" }, - { url = "https://files.pythonhosted.org/packages/29/9b/d6ecd956bb1266dd1045e995cce9b8d77759e740953a1c9aad9502a0461e/pillow-12.1.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5a8eb7ed8d4198bccbd07058416eeec51686b498e784eda166395a23eb99138e", size = 6346621, upload-time = "2026-02-11T04:21:19.547Z" }, - { url = "https://files.pythonhosted.org/packages/71/24/538bff45bde96535d7d998c6fed1a751c75ac7c53c37c90dc2601b243893/pillow-12.1.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:47b94983da0c642de92ced1702c5b6c292a84bd3a8e1d1702ff923f183594717", size = 7038069, upload-time = "2026-02-11T04:21:21.378Z" }, - { url = "https://files.pythonhosted.org/packages/94/0e/58cb1a6bc48f746bc4cb3adb8cabff73e2742c92b3bf7a220b7cf69b9177/pillow-12.1.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:518a48c2aab7ce596d3bf79d0e275661b846e86e4d0e7dec34712c30fe07f02a", size = 6460040, upload-time = "2026-02-11T04:21:23.148Z" }, - { url = "https://files.pythonhosted.org/packages/6c/57/9045cb3ff11eeb6c1adce3b2d60d7d299d7b273a2e6c8381a524abfdc474/pillow-12.1.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a550ae29b95c6dc13cf69e2c9dc5747f814c54eeb2e32d683e5e93af56caa029", size = 7164523, upload-time = "2026-02-11T04:21:25.01Z" }, - { url = "https://files.pythonhosted.org/packages/73/f2/9be9cb99f2175f0d4dbadd6616ce1bf068ee54a28277ea1bf1fbf729c250/pillow-12.1.1-cp313-cp313-win32.whl", hash = "sha256:a003d7422449f6d1e3a34e3dd4110c22148336918ddbfc6a32581cd54b2e0b2b", size = 6332552, upload-time = "2026-02-11T04:21:27.238Z" }, - { url = "https://files.pythonhosted.org/packages/3f/eb/b0834ad8b583d7d9d42b80becff092082a1c3c156bb582590fcc973f1c7c/pillow-12.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:344cf1e3dab3be4b1fa08e449323d98a2a3f819ad20f4b22e77a0ede31f0faa1", size = 7040108, upload-time = "2026-02-11T04:21:29.462Z" }, - { url = "https://files.pythonhosted.org/packages/d5/7d/fc09634e2aabdd0feabaff4a32f4a7d97789223e7c2042fd805ea4b4d2c2/pillow-12.1.1-cp313-cp313-win_arm64.whl", hash = "sha256:5c0dd1636633e7e6a0afe7bf6a51a14992b7f8e60de5789018ebbdfae55b040a", size = 2453712, upload-time = "2026-02-11T04:21:31.072Z" }, - { url = "https://files.pythonhosted.org/packages/19/2a/b9d62794fc8a0dd14c1943df68347badbd5511103e0d04c035ffe5cf2255/pillow-12.1.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0330d233c1a0ead844fc097a7d16c0abff4c12e856c0b325f231820fee1f39da", size = 5264880, upload-time = "2026-02-11T04:21:32.865Z" }, - { url = "https://files.pythonhosted.org/packages/26/9d/e03d857d1347fa5ed9247e123fcd2a97b6220e15e9cb73ca0a8d91702c6e/pillow-12.1.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5dae5f21afb91322f2ff791895ddd8889e5e947ff59f71b46041c8ce6db790bc", size = 4660616, upload-time = "2026-02-11T04:21:34.97Z" }, - { url = "https://files.pythonhosted.org/packages/f7/ec/8a6d22afd02570d30954e043f09c32772bfe143ba9285e2fdb11284952cd/pillow-12.1.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2e0c664be47252947d870ac0d327fea7e63985a08794758aa8af5b6cb6ec0c9c", size = 6269008, upload-time = "2026-02-11T04:21:36.623Z" }, - { url = "https://files.pythonhosted.org/packages/3d/1d/6d875422c9f28a4a361f495a5f68d9de4a66941dc2c619103ca335fa6446/pillow-12.1.1-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:691ab2ac363b8217f7d31b3497108fb1f50faab2f75dfb03284ec2f217e87bf8", size = 8073226, upload-time = "2026-02-11T04:21:38.585Z" }, - { url = "https://files.pythonhosted.org/packages/a1/cd/134b0b6ee5eda6dc09e25e24b40fdafe11a520bc725c1d0bbaa5e00bf95b/pillow-12.1.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e9e8064fb1cc019296958595f6db671fba95209e3ceb0c4734c9baf97de04b20", size = 6380136, upload-time = "2026-02-11T04:21:40.562Z" }, - { url = "https://files.pythonhosted.org/packages/7a/a9/7628f013f18f001c1b98d8fffe3452f306a70dc6aba7d931019e0492f45e/pillow-12.1.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:472a8d7ded663e6162dafdf20015c486a7009483ca671cece7a9279b512fcb13", size = 7067129, upload-time = "2026-02-11T04:21:42.521Z" }, - { url = "https://files.pythonhosted.org/packages/1e/f8/66ab30a2193b277785601e82ee2d49f68ea575d9637e5e234faaa98efa4c/pillow-12.1.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:89b54027a766529136a06cfebeecb3a04900397a3590fd252160b888479517bf", size = 6491807, upload-time = "2026-02-11T04:21:44.22Z" }, - { url = "https://files.pythonhosted.org/packages/da/0b/a877a6627dc8318fdb84e357c5e1a758c0941ab1ddffdafd231983788579/pillow-12.1.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:86172b0831b82ce4f7877f280055892b31179e1576aa00d0df3bb1bbf8c3e524", size = 7190954, upload-time = "2026-02-11T04:21:46.114Z" }, - { url = "https://files.pythonhosted.org/packages/83/43/6f732ff85743cf746b1361b91665d9f5155e1483817f693f8d57ea93147f/pillow-12.1.1-cp313-cp313t-win32.whl", hash = "sha256:44ce27545b6efcf0fdbdceb31c9a5bdea9333e664cda58a7e674bb74608b3986", size = 6336441, upload-time = "2026-02-11T04:21:48.22Z" }, - { url = "https://files.pythonhosted.org/packages/3b/44/e865ef3986611bb75bfabdf94a590016ea327833f434558801122979cd0e/pillow-12.1.1-cp313-cp313t-win_amd64.whl", hash = "sha256:a285e3eb7a5a45a2ff504e31f4a8d1b12ef62e84e5411c6804a42197c1cf586c", size = 7045383, upload-time = "2026-02-11T04:21:50.015Z" }, - { url = "https://files.pythonhosted.org/packages/a8/c6/f4fb24268d0c6908b9f04143697ea18b0379490cb74ba9e8d41b898bd005/pillow-12.1.1-cp313-cp313t-win_arm64.whl", hash = "sha256:cc7d296b5ea4d29e6570dabeaed58d31c3fea35a633a69679fb03d7664f43fb3", size = 2456104, upload-time = "2026-02-11T04:21:51.633Z" }, - { url = "https://files.pythonhosted.org/packages/03/d0/bebb3ffbf31c5a8e97241476c4cf8b9828954693ce6744b4a2326af3e16b/pillow-12.1.1-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:417423db963cb4be8bac3fc1204fe61610f6abeed1580a7a2cbb2fbda20f12af", size = 4062652, upload-time = "2026-02-11T04:21:53.19Z" }, - { url = "https://files.pythonhosted.org/packages/2d/c0/0e16fb0addda4851445c28f8350d8c512f09de27bbb0d6d0bbf8b6709605/pillow-12.1.1-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:b957b71c6b2387610f556a7eb0828afbe40b4a98036fc0d2acfa5a44a0c2036f", size = 4138823, upload-time = "2026-02-11T04:22:03.088Z" }, - { url = "https://files.pythonhosted.org/packages/6b/fb/6170ec655d6f6bb6630a013dd7cf7bc218423d7b5fa9071bf63dc32175ae/pillow-12.1.1-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:097690ba1f2efdeb165a20469d59d8bb03c55fb6621eb2041a060ae8ea3e9642", size = 3601143, upload-time = "2026-02-11T04:22:04.909Z" }, - { url = "https://files.pythonhosted.org/packages/59/04/dc5c3f297510ba9a6837cbb318b87dd2b8f73eb41a43cc63767f65cb599c/pillow-12.1.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:2815a87ab27848db0321fb78c7f0b2c8649dee134b7f2b80c6a45c6831d75ccd", size = 5266254, upload-time = "2026-02-11T04:22:07.656Z" }, - { url = "https://files.pythonhosted.org/packages/05/30/5db1236b0d6313f03ebf97f5e17cda9ca060f524b2fcc875149a8360b21c/pillow-12.1.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:f7ed2c6543bad5a7d5530eb9e78c53132f93dfa44a28492db88b41cdab885202", size = 4657499, upload-time = "2026-02-11T04:22:09.613Z" }, - { url = "https://files.pythonhosted.org/packages/6f/18/008d2ca0eb612e81968e8be0bbae5051efba24d52debf930126d7eaacbba/pillow-12.1.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:652a2c9ccfb556235b2b501a3a7cf3742148cd22e04b5625c5fe057ea3e3191f", size = 6232137, upload-time = "2026-02-11T04:22:11.434Z" }, - { url = "https://files.pythonhosted.org/packages/70/f1/f14d5b8eeb4b2cd62b9f9f847eb6605f103df89ef619ac68f92f748614ea/pillow-12.1.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d6e4571eedf43af33d0fc233a382a76e849badbccdf1ac438841308652a08e1f", size = 8042721, upload-time = "2026-02-11T04:22:13.321Z" }, - { url = "https://files.pythonhosted.org/packages/5a/d6/17824509146e4babbdabf04d8171491fa9d776f7061ff6e727522df9bd03/pillow-12.1.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b574c51cf7d5d62e9be37ba446224b59a2da26dc4c1bb2ecbe936a4fb1a7cb7f", size = 6347798, upload-time = "2026-02-11T04:22:15.449Z" }, - { url = "https://files.pythonhosted.org/packages/d1/ee/c85a38a9ab92037a75615aba572c85ea51e605265036e00c5b67dfafbfe2/pillow-12.1.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a37691702ed687799de29a518d63d4682d9016932db66d4e90c345831b02fb4e", size = 7039315, upload-time = "2026-02-11T04:22:17.24Z" }, - { url = "https://files.pythonhosted.org/packages/ec/f3/bc8ccc6e08a148290d7523bde4d9a0d6c981db34631390dc6e6ec34cacf6/pillow-12.1.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:f95c00d5d6700b2b890479664a06e754974848afaae5e21beb4d83c106923fd0", size = 6462360, upload-time = "2026-02-11T04:22:19.111Z" }, - { url = "https://files.pythonhosted.org/packages/f6/ab/69a42656adb1d0665ab051eec58a41f169ad295cf81ad45406963105408f/pillow-12.1.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:559b38da23606e68681337ad74622c4dbba02254fc9cb4488a305dd5975c7eeb", size = 7165438, upload-time = "2026-02-11T04:22:21.041Z" }, - { url = "https://files.pythonhosted.org/packages/02/46/81f7aa8941873f0f01d4b55cc543b0a3d03ec2ee30d617a0448bf6bd6dec/pillow-12.1.1-cp314-cp314-win32.whl", hash = "sha256:03edcc34d688572014ff223c125a3f77fb08091e4607e7745002fc214070b35f", size = 6431503, upload-time = "2026-02-11T04:22:22.833Z" }, - { url = "https://files.pythonhosted.org/packages/40/72/4c245f7d1044b67affc7f134a09ea619d4895333d35322b775b928180044/pillow-12.1.1-cp314-cp314-win_amd64.whl", hash = "sha256:50480dcd74fa63b8e78235957d302d98d98d82ccbfac4c7e12108ba9ecbdba15", size = 7176748, upload-time = "2026-02-11T04:22:24.64Z" }, - { url = "https://files.pythonhosted.org/packages/e4/ad/8a87bdbe038c5c698736e3348af5c2194ffb872ea52f11894c95f9305435/pillow-12.1.1-cp314-cp314-win_arm64.whl", hash = "sha256:5cb1785d97b0c3d1d1a16bc1d710c4a0049daefc4935f3a8f31f827f4d3d2e7f", size = 2544314, upload-time = "2026-02-11T04:22:26.685Z" }, - { url = "https://files.pythonhosted.org/packages/6c/9d/efd18493f9de13b87ede7c47e69184b9e859e4427225ea962e32e56a49bc/pillow-12.1.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:1f90cff8aa76835cba5769f0b3121a22bd4eb9e6884cfe338216e557a9a548b8", size = 5268612, upload-time = "2026-02-11T04:22:29.884Z" }, - { url = "https://files.pythonhosted.org/packages/f8/f1/4f42eb2b388eb2ffc660dcb7f7b556c1015c53ebd5f7f754965ef997585b/pillow-12.1.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1f1be78ce9466a7ee64bfda57bdba0f7cc499d9794d518b854816c41bf0aa4e9", size = 4660567, upload-time = "2026-02-11T04:22:31.799Z" }, - { url = "https://files.pythonhosted.org/packages/01/54/df6ef130fa43e4b82e32624a7b821a2be1c5653a5fdad8469687a7db4e00/pillow-12.1.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:42fc1f4677106188ad9a55562bbade416f8b55456f522430fadab3cef7cd4e60", size = 6269951, upload-time = "2026-02-11T04:22:33.921Z" }, - { url = "https://files.pythonhosted.org/packages/a9/48/618752d06cc44bb4aae8ce0cd4e6426871929ed7b46215638088270d9b34/pillow-12.1.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:98edb152429ab62a1818039744d8fbb3ccab98a7c29fc3d5fcef158f3f1f68b7", size = 8074769, upload-time = "2026-02-11T04:22:35.877Z" }, - { url = "https://files.pythonhosted.org/packages/c3/bd/f1d71eb39a72fa088d938655afba3e00b38018d052752f435838961127d8/pillow-12.1.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d470ab1178551dd17fdba0fef463359c41aaa613cdcd7ff8373f54be629f9f8f", size = 6381358, upload-time = "2026-02-11T04:22:37.698Z" }, - { url = "https://files.pythonhosted.org/packages/64/ef/c784e20b96674ed36a5af839305f55616f8b4f8aa8eeccf8531a6e312243/pillow-12.1.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6408a7b064595afcab0a49393a413732a35788f2a5092fdc6266952ed67de586", size = 7068558, upload-time = "2026-02-11T04:22:39.597Z" }, - { url = "https://files.pythonhosted.org/packages/73/cb/8059688b74422ae61278202c4e1ad992e8a2e7375227be0a21c6b87ca8d5/pillow-12.1.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5d8c41325b382c07799a3682c1c258469ea2ff97103c53717b7893862d0c98ce", size = 6493028, upload-time = "2026-02-11T04:22:42.73Z" }, - { url = "https://files.pythonhosted.org/packages/c6/da/e3c008ed7d2dd1f905b15949325934510b9d1931e5df999bb15972756818/pillow-12.1.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:c7697918b5be27424e9ce568193efd13d925c4481dd364e43f5dff72d33e10f8", size = 7191940, upload-time = "2026-02-11T04:22:44.543Z" }, - { url = "https://files.pythonhosted.org/packages/01/4a/9202e8d11714c1fc5951f2e1ef362f2d7fbc595e1f6717971d5dd750e969/pillow-12.1.1-cp314-cp314t-win32.whl", hash = "sha256:d2912fd8114fc5545aa3a4b5576512f64c55a03f3ebcca4c10194d593d43ea36", size = 6438736, upload-time = "2026-02-11T04:22:46.347Z" }, - { url = "https://files.pythonhosted.org/packages/f3/ca/cbce2327eb9885476b3957b2e82eb12c866a8b16ad77392864ad601022ce/pillow-12.1.1-cp314-cp314t-win_amd64.whl", hash = "sha256:4ceb838d4bd9dab43e06c363cab2eebf63846d6a4aeaea283bbdfd8f1a8ed58b", size = 7182894, upload-time = "2026-02-11T04:22:48.114Z" }, - { url = "https://files.pythonhosted.org/packages/ec/d2/de599c95ba0a973b94410477f8bf0b6f0b5e67360eb89bcb1ad365258beb/pillow-12.1.1-cp314-cp314t-win_arm64.whl", hash = "sha256:7b03048319bfc6170e93bd60728a1af51d3dd7704935feb228c4d4faab35d334", size = 2546446, upload-time = "2026-02-11T04:22:50.342Z" }, - { url = "https://files.pythonhosted.org/packages/56/11/5d43209aa4cb58e0cc80127956ff1796a68b928e6324bbf06ef4db34367b/pillow-12.1.1-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:600fd103672b925fe62ed08e0d874ea34d692474df6f4bf7ebe148b30f89f39f", size = 5228606, upload-time = "2026-02-11T04:22:52.106Z" }, - { url = "https://files.pythonhosted.org/packages/5f/d5/3b005b4e4fda6698b371fa6c21b097d4707585d7db99e98d9b0b87ac612a/pillow-12.1.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:665e1b916b043cef294bc54d47bf02d87e13f769bc4bc5fa225a24b3a6c5aca9", size = 4622321, upload-time = "2026-02-11T04:22:53.827Z" }, - { url = "https://files.pythonhosted.org/packages/df/36/ed3ea2d594356fd8037e5a01f6156c74bc8d92dbb0fa60746cc96cabb6e8/pillow-12.1.1-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:495c302af3aad1ca67420ddd5c7bd480c8867ad173528767d906428057a11f0e", size = 5247579, upload-time = "2026-02-11T04:22:56.094Z" }, - { url = "https://files.pythonhosted.org/packages/54/9a/9cc3e029683cf6d20ae5085da0dafc63148e3252c2f13328e553aaa13cfb/pillow-12.1.1-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8fd420ef0c52c88b5a035a0886f367748c72147b2b8f384c9d12656678dfdfa9", size = 6989094, upload-time = "2026-02-11T04:22:58.288Z" }, - { url = "https://files.pythonhosted.org/packages/00/98/fc53ab36da80b88df0967896b6c4b4cd948a0dc5aa40a754266aa3ae48b3/pillow-12.1.1-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f975aa7ef9684ce7e2c18a3aa8f8e2106ce1e46b94ab713d156b2898811651d3", size = 5313850, upload-time = "2026-02-11T04:23:00.554Z" }, - { url = "https://files.pythonhosted.org/packages/30/02/00fa585abfd9fe9d73e5f6e554dc36cc2b842898cbfc46d70353dae227f8/pillow-12.1.1-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8089c852a56c2966cf18835db62d9b34fef7ba74c726ad943928d494fa7f4735", size = 5963343, upload-time = "2026-02-11T04:23:02.934Z" }, - { url = "https://files.pythonhosted.org/packages/f2/26/c56ce33ca856e358d27fda9676c055395abddb82c35ac0f593877ed4562e/pillow-12.1.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:cb9bb857b2d057c6dfc72ac5f3b44836924ba15721882ef103cecb40d002d80e", size = 7029880, upload-time = "2026-02-11T04:23:04.783Z" }, -] - -[[package]] -name = "platformdirs" -version = "4.9.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/1b/04/fea538adf7dbbd6d186f551d595961e564a3b6715bdf276b477460858672/platformdirs-4.9.2.tar.gz", hash = "sha256:9a33809944b9db043ad67ca0db94b14bf452cc6aeaac46a88ea55b26e2e9d291", size = 28394, upload-time = "2026-02-16T03:56:10.574Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/48/31/05e764397056194206169869b50cf2fee4dbbbc71b344705b9c0d878d4d8/platformdirs-4.9.2-py3-none-any.whl", hash = "sha256:9170634f126f8efdae22fb58ae8a0eaa86f38365bc57897a6c4f781d1f5875bd", size = 21168, upload-time = "2026-02-16T03:56:08.891Z" }, -] - -[[package]] -name = "plotly" -version = "6.6.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "narwhals", marker = "python_full_version >= '3.12'" }, - { name = "packaging", marker = "python_full_version >= '3.12'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/24/fb/41efe84970cfddefd4ccf025e2cbfafe780004555f583e93dba3dac2cdef/plotly-6.6.0.tar.gz", hash = "sha256:b897f15f3b02028d69f755f236be890ba950d0a42d7dfc619b44e2d8cea8748c", size = 7027956, upload-time = "2026-03-02T21:10:25.321Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/52/d2/c6e44dba74f17c6216ce1b56044a9b93a929f1c2d5bdaff892512b260f5e/plotly-6.6.0-py3-none-any.whl", hash = "sha256:8d6daf0f87412e0c0bfe72e809d615217ab57cc715899a1e5145135a7800d1d0", size = 9910315, upload-time = "2026-03-02T21:10:18.131Z" }, -] - [[package]] name = "pluggy" version = "1.6.0" @@ -3840,34 +2964,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, ] -[[package]] -name = "polars" -version = "1.39.3" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "polars-runtime-32" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/93/ab/f19e592fce9e000da49c96bf35e77cef67f9cb4b040bfa538a2764c0263e/polars-1.39.3.tar.gz", hash = "sha256:2e016c7f3e8d14fa777ef86fe0477cec6c67023a20ba4c94d6e8431eefe4a63c", size = 728987, upload-time = "2026-03-20T11:16:24.836Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/b4/db/08f4ca10c5018813e7e0b59e4472302328b3d2ab1512f5a2157a814540e0/polars-1.39.3-py3-none-any.whl", hash = "sha256:c2b955ccc0a08a2bc9259785decf3d5c007b489b523bf2390cf21cec2bb82a56", size = 823985, upload-time = "2026-03-20T11:14:23.619Z" }, -] - -[[package]] -name = "polars-runtime-32" -version = "1.39.3" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/17/39/c8688696bc22b6c501e3b82ef3be10e543c07a785af5660f30997cd22dd2/polars_runtime_32-1.39.3.tar.gz", hash = "sha256:c728e4f469cafab501947585f36311b8fb222d3e934c6209e83791e0df20b29d", size = 2872335, upload-time = "2026-03-20T11:16:26.581Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/3b/74/1b41205f7368c9375ab1dea91178eaa20435fe3eff036390a53a7660b416/polars_runtime_32-1.39.3-cp310-abi3-macosx_10_12_x86_64.whl", hash = "sha256:425c0b220b573fa097b4042edff73114cc6d23432a21dfd2dc41adf329d7d2e9", size = 45273243, upload-time = "2026-03-20T11:14:26.691Z" }, - { url = "https://files.pythonhosted.org/packages/90/bf/297716b3095fe719be20fcf7af1d2b6ab069c38199bbace2469608a69b3a/polars_runtime_32-1.39.3-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:ef5884711e3c617d7dc93519a7d038e242f5741cfe5fe9afd32d58845d86c562", size = 40842924, upload-time = "2026-03-20T11:14:31.154Z" }, - { url = "https://files.pythonhosted.org/packages/3d/3e/e65236d9d0d9babfa0ecba593413c06530fca60a8feb8f66243aa5dba92e/polars_runtime_32-1.39.3-cp310-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:06b47f535eb1f97a9a1e5b0053ef50db3a4276e241178e37bbb1a38b1fa53b14", size = 43220650, upload-time = "2026-03-20T11:14:35.458Z" }, - { url = "https://files.pythonhosted.org/packages/b0/15/fc3e43f3fdf3f20b7dfb5abe871ab6162cf8fb4aeabf4cfad822d5dc4c79/polars_runtime_32-1.39.3-cp310-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8bc9e13dc1d2e828331f2fe8ccbc9757554dc4933a8d3e85e906b988178f95ed", size = 46877498, upload-time = "2026-03-20T11:14:40.14Z" }, - { url = "https://files.pythonhosted.org/packages/3c/81/bd5f895919e32c6ab0a7786cd0c0ca961cb03152c47c3645808b54383f31/polars_runtime_32-1.39.3-cp310-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:363d49e3a3e638fc943e2b9887940300a7d06789930855a178a4727949259dc2", size = 43380176, upload-time = "2026-03-20T11:14:45.566Z" }, - { url = "https://files.pythonhosted.org/packages/7a/3e/c86433c3b5ec0315bdfc7640d0c15d41f1216c0103a0eab9a9b5147d6c4c/polars_runtime_32-1.39.3-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:7c206bdcc7bc62ea038d6adea8e44b02f0e675e0191a54c810703b4895208ea4", size = 46485933, upload-time = "2026-03-20T11:14:51.155Z" }, - { url = "https://files.pythonhosted.org/packages/54/ce/200b310cf91f98e652eb6ea09fdb3a9718aa0293ebf113dce325797c8572/polars_runtime_32-1.39.3-cp310-abi3-win_amd64.whl", hash = "sha256:d66ca522517554a883446957539c40dc7b75eb0c2220357fb28bc8940d305339", size = 46995458, upload-time = "2026-03-20T11:14:56.074Z" }, - { url = "https://files.pythonhosted.org/packages/da/76/2d48927e0aa2abbdde08cbf4a2536883b73277d47fbeca95e952de86df34/polars_runtime_32-1.39.3-cp310-abi3-win_arm64.whl", hash = "sha256:f49f51461de63f13e5dd4eb080421c8f23f856945f3f8bd5b2b1f59da52c2860", size = 41857648, upload-time = "2026-03-20T11:15:01.142Z" }, -] - [[package]] name = "prompt-toolkit" version = "3.0.52" @@ -4043,56 +3139,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/22/a6/858897256d0deac81a172289110f31629fc4cee19b6f01283303e18c8db3/ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35", size = 13993, upload-time = "2020-12-28T15:15:28.35Z" }, ] -[[package]] -name = "pyarrow" -version = "23.0.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/88/22/134986a4cc224d593c1afde5494d18ff629393d74cc2eddb176669f234a4/pyarrow-23.0.1.tar.gz", hash = "sha256:b8c5873e33440b2bc2f4a79d2b47017a89c5a24116c055625e6f2ee50523f019", size = 1167336, upload-time = "2026-02-16T10:14:12.39Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/b0/41/8e6b6ef7e225d4ceead8459427a52afdc23379768f54dd3566014d7618c1/pyarrow-23.0.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:6f0147ee9e0386f519c952cc670eb4a8b05caa594eeffe01af0e25f699e4e9bb", size = 34302230, upload-time = "2026-02-16T10:09:03.859Z" }, - { url = "https://files.pythonhosted.org/packages/bf/4a/1472c00392f521fea03ae93408bf445cc7bfa1ab81683faf9bc188e36629/pyarrow-23.0.1-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:0ae6e17c828455b6265d590100c295193f93cc5675eb0af59e49dbd00d2de350", size = 35850050, upload-time = "2026-02-16T10:09:11.877Z" }, - { url = "https://files.pythonhosted.org/packages/0c/b2/bd1f2f05ded56af7f54d702c8364c9c43cd6abb91b0e9933f3d77b4f4132/pyarrow-23.0.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:fed7020203e9ef273360b9e45be52a2a47d3103caf156a30ace5247ffb51bdbd", size = 44491918, upload-time = "2026-02-16T10:09:18.144Z" }, - { url = "https://files.pythonhosted.org/packages/0b/62/96459ef5b67957eac38a90f541d1c28833d1b367f014a482cb63f3b7cd2d/pyarrow-23.0.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:26d50dee49d741ac0e82185033488d28d35be4d763ae6f321f97d1140eb7a0e9", size = 47562811, upload-time = "2026-02-16T10:09:25.792Z" }, - { url = "https://files.pythonhosted.org/packages/7d/94/1170e235add1f5f45a954e26cd0e906e7e74e23392dcb560de471f7366ec/pyarrow-23.0.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:3c30143b17161310f151f4a2bcfe41b5ff744238c1039338779424e38579d701", size = 48183766, upload-time = "2026-02-16T10:09:34.645Z" }, - { url = "https://files.pythonhosted.org/packages/0e/2d/39a42af4570377b99774cdb47f63ee6c7da7616bd55b3d5001aa18edfe4f/pyarrow-23.0.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:db2190fa79c80a23fdd29fef4b8992893f024ae7c17d2f5f4db7171fa30c2c78", size = 50607669, upload-time = "2026-02-16T10:09:44.153Z" }, - { url = "https://files.pythonhosted.org/packages/00/ca/db94101c187f3df742133ac837e93b1f269ebdac49427f8310ee40b6a58f/pyarrow-23.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:f00f993a8179e0e1c9713bcc0baf6d6c01326a406a9c23495ec1ba9c9ebf2919", size = 27527698, upload-time = "2026-02-16T10:09:50.263Z" }, - { url = "https://files.pythonhosted.org/packages/9a/4b/4166bb5abbfe6f750fc60ad337c43ecf61340fa52ab386da6e8dbf9e63c4/pyarrow-23.0.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:f4b0dbfa124c0bb161f8b5ebb40f1a680b70279aa0c9901d44a2b5a20806039f", size = 34214575, upload-time = "2026-02-16T10:09:56.225Z" }, - { url = "https://files.pythonhosted.org/packages/e1/da/3f941e3734ac8088ea588b53e860baeddac8323ea40ce22e3d0baa865cc9/pyarrow-23.0.1-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:7707d2b6673f7de054e2e83d59f9e805939038eebe1763fe811ee8fa5c0cd1a7", size = 35832540, upload-time = "2026-02-16T10:10:03.428Z" }, - { url = "https://files.pythonhosted.org/packages/88/7c/3d841c366620e906d54430817531b877ba646310296df42ef697308c2705/pyarrow-23.0.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:86ff03fb9f1a320266e0de855dee4b17da6794c595d207f89bba40d16b5c78b9", size = 44470940, upload-time = "2026-02-16T10:10:10.704Z" }, - { url = "https://files.pythonhosted.org/packages/2c/a5/da83046273d990f256cb79796a190bbf7ec999269705ddc609403f8c6b06/pyarrow-23.0.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:813d99f31275919c383aab17f0f455a04f5a429c261cc411b1e9a8f5e4aaaa05", size = 47586063, upload-time = "2026-02-16T10:10:17.95Z" }, - { url = "https://files.pythonhosted.org/packages/5b/3c/b7d2ebcff47a514f47f9da1e74b7949138c58cfeb108cdd4ee62f43f0cf3/pyarrow-23.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:bf5842f960cddd2ef757d486041d57c96483efc295a8c4a0e20e704cbbf39c67", size = 48173045, upload-time = "2026-02-16T10:10:25.363Z" }, - { url = "https://files.pythonhosted.org/packages/43/b2/b40961262213beaba6acfc88698eb773dfce32ecdf34d19291db94c2bd73/pyarrow-23.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:564baf97c858ecc03ec01a41062e8f4698abc3e6e2acd79c01c2e97880a19730", size = 50621741, upload-time = "2026-02-16T10:10:33.477Z" }, - { url = "https://files.pythonhosted.org/packages/f6/70/1fdda42d65b28b078e93d75d371b2185a61da89dda4def8ba6ba41ebdeb4/pyarrow-23.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:07deae7783782ac7250989a7b2ecde9b3c343a643f82e8a4df03d93b633006f0", size = 27620678, upload-time = "2026-02-16T10:10:39.31Z" }, - { url = "https://files.pythonhosted.org/packages/47/10/2cbe4c6f0fb83d2de37249567373d64327a5e4d8db72f486db42875b08f6/pyarrow-23.0.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:6b8fda694640b00e8af3c824f99f789e836720aa8c9379fb435d4c4953a756b8", size = 34210066, upload-time = "2026-02-16T10:10:45.487Z" }, - { url = "https://files.pythonhosted.org/packages/cb/4f/679fa7e84dadbaca7a65f7cdba8d6c83febbd93ca12fa4adf40ba3b6362b/pyarrow-23.0.1-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:8ff51b1addc469b9444b7c6f3548e19dc931b172ab234e995a60aea9f6e6025f", size = 35825526, upload-time = "2026-02-16T10:10:52.266Z" }, - { url = "https://files.pythonhosted.org/packages/f9/63/d2747d930882c9d661e9398eefc54f15696547b8983aaaf11d4a2e8b5426/pyarrow-23.0.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:71c5be5cbf1e1cb6169d2a0980850bccb558ddc9b747b6206435313c47c37677", size = 44473279, upload-time = "2026-02-16T10:11:01.557Z" }, - { url = "https://files.pythonhosted.org/packages/b3/93/10a48b5e238de6d562a411af6467e71e7aedbc9b87f8d3a35f1560ae30fb/pyarrow-23.0.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:9b6f4f17b43bc39d56fec96e53fe89d94bac3eb134137964371b45352d40d0c2", size = 47585798, upload-time = "2026-02-16T10:11:09.401Z" }, - { url = "https://files.pythonhosted.org/packages/5c/20/476943001c54ef078dbf9542280e22741219a184a0632862bca4feccd666/pyarrow-23.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9fc13fc6c403d1337acab46a2c4346ca6c9dec5780c3c697cf8abfd5e19b6b37", size = 48179446, upload-time = "2026-02-16T10:11:17.781Z" }, - { url = "https://files.pythonhosted.org/packages/4b/b6/5dd0c47b335fcd8edba9bfab78ad961bd0fd55ebe53468cc393f45e0be60/pyarrow-23.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5c16ed4f53247fa3ffb12a14d236de4213a4415d127fe9cebed33d51671113e2", size = 50623972, upload-time = "2026-02-16T10:11:26.185Z" }, - { url = "https://files.pythonhosted.org/packages/d5/09/a532297c9591a727d67760e2e756b83905dd89adb365a7f6e9c72578bcc1/pyarrow-23.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:cecfb12ef629cf6be0b1887f9f86463b0dd3dc3195ae6224e74006be4736035a", size = 27540749, upload-time = "2026-02-16T10:12:23.297Z" }, - { url = "https://files.pythonhosted.org/packages/a5/8e/38749c4b1303e6ae76b3c80618f84861ae0c55dd3c2273842ea6f8258233/pyarrow-23.0.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:29f7f7419a0e30264ea261fdc0e5fe63ce5a6095003db2945d7cd78df391a7e1", size = 34471544, upload-time = "2026-02-16T10:11:32.535Z" }, - { url = "https://files.pythonhosted.org/packages/a3/73/f237b2bc8c669212f842bcfd842b04fc8d936bfc9d471630569132dc920d/pyarrow-23.0.1-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:33d648dc25b51fd8055c19e4261e813dfc4d2427f068bcecc8b53d01b81b0500", size = 35949911, upload-time = "2026-02-16T10:11:39.813Z" }, - { url = "https://files.pythonhosted.org/packages/0c/86/b912195eee0903b5611bf596833def7d146ab2d301afeb4b722c57ffc966/pyarrow-23.0.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:cd395abf8f91c673dd3589cadc8cc1ee4e8674fa61b2e923c8dd215d9c7d1f41", size = 44520337, upload-time = "2026-02-16T10:11:47.764Z" }, - { url = "https://files.pythonhosted.org/packages/69/c2/f2a717fb824f62d0be952ea724b4f6f9372a17eed6f704b5c9526f12f2f1/pyarrow-23.0.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:00be9576d970c31defb5c32eb72ef585bf600ef6d0a82d5eccaae96639cf9d07", size = 47548944, upload-time = "2026-02-16T10:11:56.607Z" }, - { url = "https://files.pythonhosted.org/packages/84/a7/90007d476b9f0dc308e3bc57b832d004f848fd6c0da601375d20d92d1519/pyarrow-23.0.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c2139549494445609f35a5cda4eb94e2c9e4d704ce60a095b342f82460c73a83", size = 48236269, upload-time = "2026-02-16T10:12:04.47Z" }, - { url = "https://files.pythonhosted.org/packages/b0/3f/b16fab3e77709856eb6ac328ce35f57a6d4a18462c7ca5186ef31b45e0e0/pyarrow-23.0.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:7044b442f184d84e2351e5084600f0d7343d6117aabcbc1ac78eb1ae11eb4125", size = 50604794, upload-time = "2026-02-16T10:12:11.797Z" }, - { url = "https://files.pythonhosted.org/packages/e9/a1/22df0620a9fac31d68397a75465c344e83c3dfe521f7612aea33e27ab6c0/pyarrow-23.0.1-cp313-cp313t-win_amd64.whl", hash = "sha256:a35581e856a2fafa12f3f54fce4331862b1cfb0bef5758347a858a4aa9d6bae8", size = 27660642, upload-time = "2026-02-16T10:12:17.746Z" }, - { url = "https://files.pythonhosted.org/packages/8d/1b/6da9a89583ce7b23ac611f183ae4843cd3a6cf54f079549b0e8c14031e73/pyarrow-23.0.1-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:5df1161da23636a70838099d4aaa65142777185cc0cdba4037a18cee7d8db9ca", size = 34238755, upload-time = "2026-02-16T10:12:32.819Z" }, - { url = "https://files.pythonhosted.org/packages/ae/b5/d58a241fbe324dbaeb8df07be6af8752c846192d78d2272e551098f74e88/pyarrow-23.0.1-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:fa8e51cb04b9f8c9c5ace6bab63af9a1f88d35c0d6cbf53e8c17c098552285e1", size = 35847826, upload-time = "2026-02-16T10:12:38.949Z" }, - { url = "https://files.pythonhosted.org/packages/54/a5/8cbc83f04aba433ca7b331b38f39e000efd9f0c7ce47128670e737542996/pyarrow-23.0.1-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:0b95a3994f015be13c63148fef8832e8a23938128c185ee951c98908a696e0eb", size = 44536859, upload-time = "2026-02-16T10:12:45.467Z" }, - { url = "https://files.pythonhosted.org/packages/36/2e/c0f017c405fcdc252dbccafbe05e36b0d0eb1ea9a958f081e01c6972927f/pyarrow-23.0.1-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:4982d71350b1a6e5cfe1af742c53dfb759b11ce14141870d05d9e540d13bc5d1", size = 47614443, upload-time = "2026-02-16T10:12:55.525Z" }, - { url = "https://files.pythonhosted.org/packages/af/6b/2314a78057912f5627afa13ba43809d9d653e6630859618b0fd81a4e0759/pyarrow-23.0.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c250248f1fe266db627921c89b47b7c06fee0489ad95b04d50353537d74d6886", size = 48232991, upload-time = "2026-02-16T10:13:04.729Z" }, - { url = "https://files.pythonhosted.org/packages/40/f2/1bcb1d3be3460832ef3370d621142216e15a2c7c62602a4ea19ec240dd64/pyarrow-23.0.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5f4763b83c11c16e5f4c15601ba6dfa849e20723b46aa2617cb4bffe8768479f", size = 50645077, upload-time = "2026-02-16T10:13:14.147Z" }, - { url = "https://files.pythonhosted.org/packages/eb/3f/b1da7b61cd66566a4d4c8383d376c606d1c34a906c3f1cb35c479f59d1aa/pyarrow-23.0.1-cp314-cp314-win_amd64.whl", hash = "sha256:3a4c85ef66c134161987c17b147d6bffdca4566f9a4c1d81a0a01cdf08414ea5", size = 28234271, upload-time = "2026-02-16T10:14:09.397Z" }, - { url = "https://files.pythonhosted.org/packages/b5/78/07f67434e910a0f7323269be7bfbf58699bd0c1d080b18a1ab49ba943fe8/pyarrow-23.0.1-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:17cd28e906c18af486a499422740298c52d7c6795344ea5002a7720b4eadf16d", size = 34488692, upload-time = "2026-02-16T10:13:21.541Z" }, - { url = "https://files.pythonhosted.org/packages/50/76/34cf7ae93ece1f740a04910d9f7e80ba166b9b4ab9596a953e9e62b90fe1/pyarrow-23.0.1-cp314-cp314t-macosx_12_0_x86_64.whl", hash = "sha256:76e823d0e86b4fb5e1cf4a58d293036e678b5a4b03539be933d3b31f9406859f", size = 35964383, upload-time = "2026-02-16T10:13:28.63Z" }, - { url = "https://files.pythonhosted.org/packages/46/90/459b827238936d4244214be7c684e1b366a63f8c78c380807ae25ed92199/pyarrow-23.0.1-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:a62e1899e3078bf65943078b3ad2a6ddcacf2373bc06379aac61b1e548a75814", size = 44538119, upload-time = "2026-02-16T10:13:35.506Z" }, - { url = "https://files.pythonhosted.org/packages/28/a1/93a71ae5881e99d1f9de1d4554a87be37da11cd6b152239fb5bd924fdc64/pyarrow-23.0.1-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:df088e8f640c9fae3b1f495b3c64755c4e719091caf250f3a74d095ddf3c836d", size = 47571199, upload-time = "2026-02-16T10:13:42.504Z" }, - { url = "https://files.pythonhosted.org/packages/88/a3/d2c462d4ef313521eaf2eff04d204ac60775263f1fb08c374b543f79f610/pyarrow-23.0.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:46718a220d64677c93bc243af1d44b55998255427588e400677d7192671845c7", size = 48259435, upload-time = "2026-02-16T10:13:49.226Z" }, - { url = "https://files.pythonhosted.org/packages/cc/f1/11a544b8c3d38a759eb3fbb022039117fd633e9a7b19e4841cc3da091915/pyarrow-23.0.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a09f3876e87f48bc2f13583ab551f0379e5dfb83210391e68ace404181a20690", size = 50629149, upload-time = "2026-02-16T10:13:57.238Z" }, - { url = "https://files.pythonhosted.org/packages/50/f2/c0e76a0b451ffdf0cf788932e182758eb7558953f4f27f1aff8e2518b653/pyarrow-23.0.1-cp314-cp314t-win_amd64.whl", hash = "sha256:527e8d899f14bd15b740cd5a54ad56b7f98044955373a17179d5956ddb93d9ce", size = 28365807, upload-time = "2026-02-16T10:14:03.892Z" }, -] - [[package]] name = "pyasn1" version = "0.6.3" @@ -4168,18 +3214,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5a/87/b70ad306ebb6f9b585f114d0ac2137d792b48be34d732d60e597c2f8465a/pydantic-2.12.5-py3-none-any.whl", hash = "sha256:e561593fccf61e8a20fc46dfc2dfe075b8be7d0188df33f221ad1f0139180f9d", size = 463580, upload-time = "2025-11-26T15:11:44.605Z" }, ] -[[package]] -name = "pydantic-cli" -version = "10.0.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pydantic" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/3d/45/b383f86c77e9f38360f66253a223f127a74a58aa46e22e52011093f83b3a/pydantic_cli-10.0.0.tar.gz", hash = "sha256:1439d1db73664177c838ca1b90ae8eca19c65ce3b119a79a7b6c6f07cb79874a", size = 34984, upload-time = "2025-10-16T07:00:45.091Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/1c/41/5262fca75b48906b03bd1e156b99330699b59a198b220051128a23917e9a/pydantic_cli-10.0.0-py3-none-any.whl", hash = "sha256:e3778aed1e412c9962812af6a11d92ba514df6266bd60835f843b6332dae6eed", size = 43076, upload-time = "2025-10-16T07:00:43.705Z" }, -] - [[package]] name = "pydantic-core" version = "2.41.5" @@ -4291,19 +3325,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/00/4b/ccc026168948fec4f7555b9164c724cf4125eac006e176541483d2c959be/pydantic_settings-2.13.1-py3-none-any.whl", hash = "sha256:d56fd801823dbeae7f0975e1f8c8e25c258eb75d278ea7abb5d9cebb01b56237", size = 58929, upload-time = "2026-02-19T13:45:06.034Z" }, ] -[[package]] -name = "pydeck" -version = "0.9.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "jinja2", marker = "python_full_version >= '3.12'" }, - { name = "numpy", marker = "python_full_version >= '3.12'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/a1/ca/40e14e196864a0f61a92abb14d09b3d3da98f94ccb03b49cf51688140dab/pydeck-0.9.1.tar.gz", hash = "sha256:f74475ae637951d63f2ee58326757f8d4f9cd9f2a457cf42950715003e2cb605", size = 3832240, upload-time = "2024-05-10T15:36:21.153Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ab/4c/b888e6cf58bd9db9c93f40d1c6be8283ff49d88919231afe93a6bcf61626/pydeck-0.9.1-py2.py3-none-any.whl", hash = "sha256:b3f75ba0d273fc917094fa61224f3f6076ca8752b93d46faf3bcfd9f9d59b038", size = 6900403, upload-time = "2024-05-10T15:36:17.36Z" }, -] - [[package]] name = "pygments" version = "2.19.2" @@ -4616,110 +3637,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2c/58/ca301544e1fa93ed4f80d724bf5b194f6e4b945841c5bfd555878eea9fcb/referencing-0.37.0-py3-none-any.whl", hash = "sha256:381329a9f99628c9069361716891d34ad94af76e461dcb0335825aecc7692231", size = 26766, upload-time = "2025-10-13T15:30:47.625Z" }, ] -[[package]] -name = "regex" -version = "2026.2.19" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ff/c0/d8079d4f6342e4cec5c3e7d7415b5cd3e633d5f4124f7a4626908dbe84c7/regex-2026.2.19.tar.gz", hash = "sha256:6fb8cb09b10e38f3ae17cc6dc04a1df77762bd0351b6ba9041438e7cc85ec310", size = 414973, upload-time = "2026-02-19T19:03:47.899Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/6f/93/43f405a98f54cc59c786efb4fc0b644615ed2392fc89d57d30da11f35b5b/regex-2026.2.19-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:93b16a18cadb938f0f2306267161d57eb33081a861cee9ffcd71e60941eb5dfc", size = 488365, upload-time = "2026-02-19T19:00:17.857Z" }, - { url = "https://files.pythonhosted.org/packages/66/46/da0efce22cd8f5ae28eeb25ac69703f49edcad3331ac22440776f4ea0867/regex-2026.2.19-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:78af1e499cab704131f6f4e2f155b7f54ce396ca2acb6ef21a49507e4752e0be", size = 290737, upload-time = "2026-02-19T19:00:19.869Z" }, - { url = "https://files.pythonhosted.org/packages/fb/19/f735078448132c1c974974d30d5306337bc297fe6b6f126164bff72c1019/regex-2026.2.19-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:eb20c11aa4c3793c9ad04c19a972078cdadb261b8429380364be28e867a843f2", size = 288654, upload-time = "2026-02-19T19:00:21.307Z" }, - { url = "https://files.pythonhosted.org/packages/e2/3e/6d7c24a2f423c03ad03e3fbddefa431057186ac1c4cb4fa98b03c7f39808/regex-2026.2.19-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:db5fd91eec71e7b08de10011a2223d0faa20448d4e1380b9daa179fa7bf58906", size = 793785, upload-time = "2026-02-19T19:00:22.926Z" }, - { url = "https://files.pythonhosted.org/packages/67/32/fdb8107504b3122a79bde6705ac1f9d495ed1fe35b87d7cfc1864471999a/regex-2026.2.19-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:fdbade8acba71bb45057c2b72f477f0b527c4895f9c83e6cfc30d4a006c21726", size = 860731, upload-time = "2026-02-19T19:00:25.196Z" }, - { url = "https://files.pythonhosted.org/packages/9a/fd/cc8c6f05868defd840be6e75919b1c3f462357969ac2c2a0958363b4dc23/regex-2026.2.19-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:31a5f561eb111d6aae14202e7043fb0b406d3c8dddbbb9e60851725c9b38ab1d", size = 907350, upload-time = "2026-02-19T19:00:27.093Z" }, - { url = "https://files.pythonhosted.org/packages/b5/1b/4590db9caa8db3d5a3fe31197c4e42c15aab3643b549ef6a454525fa3a61/regex-2026.2.19-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4584a3ee5f257b71e4b693cc9be3a5104249399f4116fe518c3f79b0c6fc7083", size = 800628, upload-time = "2026-02-19T19:00:29.392Z" }, - { url = "https://files.pythonhosted.org/packages/76/05/513eaa5b96fa579fd0b813e19ec047baaaf573d7374ff010fa139b384bf7/regex-2026.2.19-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:196553ba2a2f47904e5dc272d948a746352e2644005627467e055be19d73b39e", size = 773711, upload-time = "2026-02-19T19:00:30.996Z" }, - { url = "https://files.pythonhosted.org/packages/95/65/5aed06d8c54563d37fea496cf888be504879a3981a7c8e12c24b2c92c209/regex-2026.2.19-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0c10869d18abb759a3317c757746cc913d6324ce128b8bcec99350df10419f18", size = 783186, upload-time = "2026-02-19T19:00:34.598Z" }, - { url = "https://files.pythonhosted.org/packages/2c/57/79a633ad90f2371b4ef9cd72ba3a69a1a67d0cfaab4fe6fa8586d46044ef/regex-2026.2.19-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:e689fed279cbe797a6b570bd18ff535b284d057202692c73420cb93cca41aa32", size = 854854, upload-time = "2026-02-19T19:00:37.306Z" }, - { url = "https://files.pythonhosted.org/packages/eb/2d/0f113d477d9e91ec4545ec36c82e58be25038d06788229c91ad52da2b7f5/regex-2026.2.19-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:0782bd983f19ac7594039c9277cd6f75c89598c1d72f417e4d30d874105eb0c7", size = 762279, upload-time = "2026-02-19T19:00:39.793Z" }, - { url = "https://files.pythonhosted.org/packages/39/cb/237e9fa4f61469fd4f037164dbe8e675a376c88cf73aaaa0aedfd305601c/regex-2026.2.19-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:dbb240c81cfed5d4a67cb86d7676d9f7ec9c3f186310bec37d8a1415210e111e", size = 846172, upload-time = "2026-02-19T19:00:42.134Z" }, - { url = "https://files.pythonhosted.org/packages/ac/7c/104779c5915cc4eb557a33590f8a3f68089269c64287dd769afd76c7ce61/regex-2026.2.19-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:80d31c3f1fe7e4c6cd1831cd4478a0609903044dfcdc4660abfe6fb307add7f0", size = 789078, upload-time = "2026-02-19T19:00:43.908Z" }, - { url = "https://files.pythonhosted.org/packages/a8/4a/eae4e88b1317fb2ff57794915e0099198f51e760f6280b320adfa0ad396d/regex-2026.2.19-cp311-cp311-win32.whl", hash = "sha256:66e6a43225ff1064f8926adbafe0922b370d381c3330edaf9891cade52daa790", size = 266013, upload-time = "2026-02-19T19:00:47.274Z" }, - { url = "https://files.pythonhosted.org/packages/f9/29/ba89eb8fae79705e07ad1bd69e568f776159d2a8093c9dbc5303ee618298/regex-2026.2.19-cp311-cp311-win_amd64.whl", hash = "sha256:59a7a5216485a1896c5800e9feb8ff9213e11967b482633b6195d7da11450013", size = 277906, upload-time = "2026-02-19T19:00:49.011Z" }, - { url = "https://files.pythonhosted.org/packages/e3/1a/042d8f04b28e318df92df69d8becb0f42221eb3dd4fe5e976522f4337c76/regex-2026.2.19-cp311-cp311-win_arm64.whl", hash = "sha256:ec661807ffc14c8d14bb0b8c1bb3d5906e476bc96f98b565b709d03962ee4dd4", size = 270463, upload-time = "2026-02-19T19:00:50.988Z" }, - { url = "https://files.pythonhosted.org/packages/b3/73/13b39c7c9356f333e564ab4790b6cb0df125b8e64e8d6474e73da49b1955/regex-2026.2.19-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:c1665138776e4ac1aa75146669236f7a8a696433ec4e525abf092ca9189247cc", size = 489541, upload-time = "2026-02-19T19:00:52.728Z" }, - { url = "https://files.pythonhosted.org/packages/15/77/fcc7bd9a67000d07fbcc11ed226077287a40d5c84544e62171d29d3ef59c/regex-2026.2.19-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d792b84709021945597e05656aac059526df4e0c9ef60a0eaebb306f8fafcaa8", size = 291414, upload-time = "2026-02-19T19:00:54.51Z" }, - { url = "https://files.pythonhosted.org/packages/f9/87/3997fc72dc59233426ef2e18dfdd105bb123812fff740ee9cc348f1a3243/regex-2026.2.19-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:db970bcce4d63b37b3f9eb8c893f0db980bbf1d404a1d8d2b17aa8189de92c53", size = 289140, upload-time = "2026-02-19T19:00:56.841Z" }, - { url = "https://files.pythonhosted.org/packages/f3/d0/b7dd3883ed1cff8ee0c0c9462d828aaf12be63bf5dc55453cbf423523b13/regex-2026.2.19-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:03d706fbe7dfec503c8c3cb76f9352b3e3b53b623672aa49f18a251a6c71b8e6", size = 798767, upload-time = "2026-02-19T19:00:59.014Z" }, - { url = "https://files.pythonhosted.org/packages/4a/7e/8e2d09103832891b2b735a2515abf377db21144c6dd5ede1fb03c619bf09/regex-2026.2.19-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8dbff048c042beef60aa1848961384572c5afb9e8b290b0f1203a5c42cf5af65", size = 864436, upload-time = "2026-02-19T19:01:00.772Z" }, - { url = "https://files.pythonhosted.org/packages/8a/2e/afea8d23a6db1f67f45e3a0da3057104ce32e154f57dd0c8997274d45fcd/regex-2026.2.19-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ccaaf9b907ea6b4223d5cbf5fa5dff5f33dc66f4907a25b967b8a81339a6e332", size = 912391, upload-time = "2026-02-19T19:01:02.865Z" }, - { url = "https://files.pythonhosted.org/packages/59/3c/ea5a4687adaba5e125b9bd6190153d0037325a0ba3757cc1537cc2c8dd90/regex-2026.2.19-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:75472631eee7898e16a8a20998d15106cb31cfde21cdf96ab40b432a7082af06", size = 803702, upload-time = "2026-02-19T19:01:05.298Z" }, - { url = "https://files.pythonhosted.org/packages/dc/c5/624a0705e8473a26488ec1a3a4e0b8763ecfc682a185c302dfec71daea35/regex-2026.2.19-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d89f85a5ccc0cec125c24be75610d433d65295827ebaf0d884cbe56df82d4774", size = 775980, upload-time = "2026-02-19T19:01:07.047Z" }, - { url = "https://files.pythonhosted.org/packages/4d/4b/ed776642533232b5599b7c1f9d817fe11faf597e8a92b7a44b841daaae76/regex-2026.2.19-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0d9f81806abdca3234c3dd582b8a97492e93de3602c8772013cb4affa12d1668", size = 788122, upload-time = "2026-02-19T19:01:08.744Z" }, - { url = "https://files.pythonhosted.org/packages/8c/58/e93e093921d13b9784b4f69896b6e2a9e09580a265c59d9eb95e87d288f2/regex-2026.2.19-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:9dadc10d1c2bbb1326e572a226d2ec56474ab8aab26fdb8cf19419b372c349a9", size = 858910, upload-time = "2026-02-19T19:01:10.488Z" }, - { url = "https://files.pythonhosted.org/packages/85/77/ff1d25a0c56cd546e0455cbc93235beb33474899690e6a361fa6b52d265b/regex-2026.2.19-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:6bc25d7e15f80c9dc7853cbb490b91c1ec7310808b09d56bd278fe03d776f4f6", size = 764153, upload-time = "2026-02-19T19:01:12.156Z" }, - { url = "https://files.pythonhosted.org/packages/cd/ef/8ec58df26d52d04443b1dc56f9be4b409f43ed5ae6c0248a287f52311fc4/regex-2026.2.19-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:965d59792f5037d9138da6fed50ba943162160443b43d4895b182551805aff9c", size = 850348, upload-time = "2026-02-19T19:01:14.147Z" }, - { url = "https://files.pythonhosted.org/packages/f5/b3/c42fd5ed91639ce5a4225b9df909180fc95586db071f2bf7c68d2ccbfbe6/regex-2026.2.19-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:38d88c6ed4a09ed61403dbdf515d969ccba34669af3961ceb7311ecd0cef504a", size = 789977, upload-time = "2026-02-19T19:01:15.838Z" }, - { url = "https://files.pythonhosted.org/packages/b6/22/bc3b58ebddbfd6ca5633e71fd41829ee931963aad1ebeec55aad0c23044e/regex-2026.2.19-cp312-cp312-win32.whl", hash = "sha256:5df947cabab4b643d4791af5e28aecf6bf62e6160e525651a12eba3d03755e6b", size = 266381, upload-time = "2026-02-19T19:01:17.952Z" }, - { url = "https://files.pythonhosted.org/packages/fc/4a/6ff550b63e67603ee60e69dc6bd2d5694e85046a558f663b2434bdaeb285/regex-2026.2.19-cp312-cp312-win_amd64.whl", hash = "sha256:4146dc576ea99634ae9c15587d0c43273b4023a10702998edf0fa68ccb60237a", size = 277274, upload-time = "2026-02-19T19:01:19.826Z" }, - { url = "https://files.pythonhosted.org/packages/cc/29/9ec48b679b1e87e7bc8517dff45351eab38f74fbbda1fbcf0e9e6d4e8174/regex-2026.2.19-cp312-cp312-win_arm64.whl", hash = "sha256:cdc0a80f679353bd68450d2a42996090c30b2e15ca90ded6156c31f1a3b63f3b", size = 270509, upload-time = "2026-02-19T19:01:22.075Z" }, - { url = "https://files.pythonhosted.org/packages/d2/2d/a849835e76ac88fcf9e8784e642d3ea635d183c4112150ca91499d6703af/regex-2026.2.19-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8df08decd339e8b3f6a2eb5c05c687fe9d963ae91f352bc57beb05f5b2ac6879", size = 489329, upload-time = "2026-02-19T19:01:23.841Z" }, - { url = "https://files.pythonhosted.org/packages/da/aa/78ff4666d3855490bae87845a5983485e765e1f970da20adffa2937b241d/regex-2026.2.19-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3aa0944f1dc6e92f91f3b306ba7f851e1009398c84bfd370633182ee4fc26a64", size = 291308, upload-time = "2026-02-19T19:01:25.605Z" }, - { url = "https://files.pythonhosted.org/packages/cd/58/714384efcc07ae6beba528a541f6e99188c5cc1bc0295337f4e8a868296d/regex-2026.2.19-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c13228fbecb03eadbfd8f521732c5fda09ef761af02e920a3148e18ad0e09968", size = 289033, upload-time = "2026-02-19T19:01:27.243Z" }, - { url = "https://files.pythonhosted.org/packages/75/ec/6438a9344d2869cf5265236a06af1ca6d885e5848b6561e10629bc8e5a11/regex-2026.2.19-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0d0e72703c60d68b18b27cde7cdb65ed2570ae29fb37231aa3076bfb6b1d1c13", size = 798798, upload-time = "2026-02-19T19:01:28.877Z" }, - { url = "https://files.pythonhosted.org/packages/c2/be/b1ce2d395e3fd2ce5f2fde2522f76cade4297cfe84cd61990ff48308749c/regex-2026.2.19-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:46e69a4bf552e30e74a8aa73f473c87efcb7f6e8c8ece60d9fd7bf13d5c86f02", size = 864444, upload-time = "2026-02-19T19:01:30.933Z" }, - { url = "https://files.pythonhosted.org/packages/d5/97/a3406460c504f7136f140d9461960c25f058b0240e4424d6fb73c7a067ab/regex-2026.2.19-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8edda06079bd770f7f0cf7f3bba1a0b447b96b4a543c91fe0c142d034c166161", size = 912633, upload-time = "2026-02-19T19:01:32.744Z" }, - { url = "https://files.pythonhosted.org/packages/8b/d9/e5dbef95008d84e9af1dc0faabbc34a7fbc8daa05bc5807c5cf86c2bec49/regex-2026.2.19-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9cbc69eae834afbf634f7c902fc72ff3e993f1c699156dd1af1adab5d06b7fe7", size = 803718, upload-time = "2026-02-19T19:01:34.61Z" }, - { url = "https://files.pythonhosted.org/packages/2f/e5/61d80132690a1ef8dc48e0f44248036877aebf94235d43f63a20d1598888/regex-2026.2.19-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bcf57d30659996ee5c7937999874504c11b5a068edc9515e6a59221cc2744dd1", size = 775975, upload-time = "2026-02-19T19:01:36.525Z" }, - { url = "https://files.pythonhosted.org/packages/05/32/ae828b3b312c972cf228b634447de27237d593d61505e6ad84723f8eabba/regex-2026.2.19-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8e6e77cd92216eb489e21e5652a11b186afe9bdefca8a2db739fd6b205a9e0a4", size = 788129, upload-time = "2026-02-19T19:01:38.498Z" }, - { url = "https://files.pythonhosted.org/packages/cb/25/d74f34676f22bec401eddf0e5e457296941e10cbb2a49a571ca7a2c16e5a/regex-2026.2.19-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:b9ab8dec42afefa6314ea9b31b188259ffdd93f433d77cad454cd0b8d235ce1c", size = 858818, upload-time = "2026-02-19T19:01:40.409Z" }, - { url = "https://files.pythonhosted.org/packages/1e/eb/0bc2b01a6b0b264e1406e5ef11cae3f634c3bd1a6e61206fd3227ce8e89c/regex-2026.2.19-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:294c0fb2e87c6bcc5f577c8f609210f5700b993151913352ed6c6af42f30f95f", size = 764186, upload-time = "2026-02-19T19:01:43.009Z" }, - { url = "https://files.pythonhosted.org/packages/eb/37/5fe5a630d0d99ecf0c3570f8905dafbc160443a2d80181607770086c9812/regex-2026.2.19-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:c0924c64b082d4512b923ac016d6e1dcf647a3560b8a4c7e55cbbd13656cb4ed", size = 850363, upload-time = "2026-02-19T19:01:45.015Z" }, - { url = "https://files.pythonhosted.org/packages/c3/45/ef68d805294b01ec030cfd388724ba76a5a21a67f32af05b17924520cb0b/regex-2026.2.19-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:790dbf87b0361606cb0d79b393c3e8f4436a14ee56568a7463014565d97da02a", size = 790026, upload-time = "2026-02-19T19:01:47.51Z" }, - { url = "https://files.pythonhosted.org/packages/d6/3a/40d3b66923dfc5aeba182f194f0ca35d09afe8c031a193e6ae46971a0a0e/regex-2026.2.19-cp313-cp313-win32.whl", hash = "sha256:43cdde87006271be6963896ed816733b10967baaf0e271d529c82e93da66675b", size = 266372, upload-time = "2026-02-19T19:01:49.469Z" }, - { url = "https://files.pythonhosted.org/packages/3d/f2/39082e8739bfd553497689e74f9d5e5bb531d6f8936d0b94f43e18f219c0/regex-2026.2.19-cp313-cp313-win_amd64.whl", hash = "sha256:127ea69273485348a126ebbf3d6052604d3c7da284f797bba781f364c0947d47", size = 277253, upload-time = "2026-02-19T19:01:51.208Z" }, - { url = "https://files.pythonhosted.org/packages/c2/c2/852b9600d53fb47e47080c203e2cdc0ac7e84e37032a57e0eaa37446033a/regex-2026.2.19-cp313-cp313-win_arm64.whl", hash = "sha256:5e56c669535ac59cbf96ca1ece0ef26cb66809990cda4fa45e1e32c3b146599e", size = 270505, upload-time = "2026-02-19T19:01:52.865Z" }, - { url = "https://files.pythonhosted.org/packages/a9/a2/e0b4575b93bc84db3b1fab24183e008691cd2db5c0ef14ed52681fbd94dd/regex-2026.2.19-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:93d881cab5afdc41a005dba1524a40947d6f7a525057aa64aaf16065cf62faa9", size = 492202, upload-time = "2026-02-19T19:01:54.816Z" }, - { url = "https://files.pythonhosted.org/packages/24/b5/b84fec8cbb5f92a7eed2b6b5353a6a9eed9670fee31817c2da9eb85dc797/regex-2026.2.19-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:80caaa1ddcc942ec7be18427354f9d58a79cee82dea2a6b3d4fd83302e1240d7", size = 292884, upload-time = "2026-02-19T19:01:58.254Z" }, - { url = "https://files.pythonhosted.org/packages/70/0c/fe89966dfae43da46f475362401f03e4d7dc3a3c955b54f632abc52669e0/regex-2026.2.19-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:d793c5b4d2b4c668524cd1651404cfc798d40694c759aec997e196fe9729ec60", size = 291236, upload-time = "2026-02-19T19:01:59.966Z" }, - { url = "https://files.pythonhosted.org/packages/f2/f7/bda2695134f3e63eb5cccbbf608c2a12aab93d261ff4e2fe49b47fabc948/regex-2026.2.19-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b5100acb20648d9efd3f4e7e91f51187f95f22a741dcd719548a6cf4e1b34b3f", size = 807660, upload-time = "2026-02-19T19:02:01.632Z" }, - { url = "https://files.pythonhosted.org/packages/11/56/6e3a4bf5e60d17326b7003d91bbde8938e439256dec211d835597a44972d/regex-2026.2.19-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:5e3a31e94d10e52a896adaa3adf3621bd526ad2b45b8c2d23d1bbe74c7423007", size = 873585, upload-time = "2026-02-19T19:02:03.522Z" }, - { url = "https://files.pythonhosted.org/packages/35/5e/c90c6aa4d1317cc11839359479cfdd2662608f339e84e81ba751c8a4e461/regex-2026.2.19-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8497421099b981f67c99eba4154cf0dfd8e47159431427a11cfb6487f7791d9e", size = 915243, upload-time = "2026-02-19T19:02:05.608Z" }, - { url = "https://files.pythonhosted.org/packages/90/7c/981ea0694116793001496aaf9524e5c99e122ec3952d9e7f1878af3a6bf1/regex-2026.2.19-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1e7a08622f7d51d7a068f7e4052a38739c412a3e74f55817073d2e2418149619", size = 812922, upload-time = "2026-02-19T19:02:08.115Z" }, - { url = "https://files.pythonhosted.org/packages/2d/be/9eda82afa425370ffdb3fa9f3ea42450b9ae4da3ff0a4ec20466f69e371b/regex-2026.2.19-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8abe671cf0f15c26b1ad389bf4043b068ce7d3b1c5d9313e12895f57d6738555", size = 781318, upload-time = "2026-02-19T19:02:10.072Z" }, - { url = "https://files.pythonhosted.org/packages/c6/d5/50f0bbe56a8199f60a7b6c714e06e54b76b33d31806a69d0703b23ce2a9e/regex-2026.2.19-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:5a8f28dd32a4ce9c41758d43b5b9115c1c497b4b1f50c457602c1d571fa98ce1", size = 795649, upload-time = "2026-02-19T19:02:11.96Z" }, - { url = "https://files.pythonhosted.org/packages/c5/09/d039f081e44a8b0134d0bb2dd805b0ddf390b69d0b58297ae098847c572f/regex-2026.2.19-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:654dc41a5ba9b8cc8432b3f1aa8906d8b45f3e9502442a07c2f27f6c63f85db5", size = 868844, upload-time = "2026-02-19T19:02:14.043Z" }, - { url = "https://files.pythonhosted.org/packages/ef/53/e2903b79a19ec8557fe7cd21cd093956ff2dbc2e0e33969e3adbe5b184dd/regex-2026.2.19-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:4a02faea614e7fdd6ba8b3bec6c8e79529d356b100381cec76e638f45d12ca04", size = 770113, upload-time = "2026-02-19T19:02:16.161Z" }, - { url = "https://files.pythonhosted.org/packages/8f/e2/784667767b55714ebb4e59bf106362327476b882c0b2f93c25e84cc99b1a/regex-2026.2.19-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:d96162140bb819814428800934c7b71b7bffe81fb6da2d6abc1dcca31741eca3", size = 854922, upload-time = "2026-02-19T19:02:18.155Z" }, - { url = "https://files.pythonhosted.org/packages/59/78/9ef4356bd4aed752775bd18071034979b85f035fec51f3a4f9dea497a254/regex-2026.2.19-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c227f2922153ee42bbeb355fd6d009f8c81d9d7bdd666e2276ce41f53ed9a743", size = 799636, upload-time = "2026-02-19T19:02:20.04Z" }, - { url = "https://files.pythonhosted.org/packages/cf/54/fcfc9287f20c5c9bd8db755aafe3e8cf4d99a6a3f1c7162ee182e0ca9374/regex-2026.2.19-cp313-cp313t-win32.whl", hash = "sha256:a178df8ec03011153fbcd2c70cb961bc98cbbd9694b28f706c318bee8927c3db", size = 268968, upload-time = "2026-02-19T19:02:22.816Z" }, - { url = "https://files.pythonhosted.org/packages/1e/a0/ff24c6cb1273e42472706d277147fc38e1f9074a280fb6034b0fc9b69415/regex-2026.2.19-cp313-cp313t-win_amd64.whl", hash = "sha256:2c1693ca6f444d554aa246b592355b5cec030ace5a2729eae1b04ab6e853e768", size = 280390, upload-time = "2026-02-19T19:02:25.231Z" }, - { url = "https://files.pythonhosted.org/packages/1a/b6/a3f6ad89d780ffdeebb4d5e2e3e30bd2ef1f70f6a94d1760e03dd1e12c60/regex-2026.2.19-cp313-cp313t-win_arm64.whl", hash = "sha256:c0761d7ae8d65773e01515ebb0b304df1bf37a0a79546caad9cbe79a42c12af7", size = 271643, upload-time = "2026-02-19T19:02:27.175Z" }, - { url = "https://files.pythonhosted.org/packages/2d/e2/7ad4e76a6dddefc0d64dbe12a4d3ca3947a19ddc501f864a5df2a8222ddd/regex-2026.2.19-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:03d191a9bcf94d31af56d2575210cb0d0c6a054dbcad2ea9e00aa4c42903b919", size = 489306, upload-time = "2026-02-19T19:02:29.058Z" }, - { url = "https://files.pythonhosted.org/packages/14/95/ee1736135733afbcf1846c58671046f99c4d5170102a150ebb3dd8d701d9/regex-2026.2.19-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:516ee067c6c721d0d0bfb80a2004edbd060fffd07e456d4e1669e38fe82f922e", size = 291218, upload-time = "2026-02-19T19:02:31.083Z" }, - { url = "https://files.pythonhosted.org/packages/ef/08/180d1826c3d7065200a5168c6b993a44947395c7bb6e04b2c2a219c34225/regex-2026.2.19-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:997862c619994c4a356cb7c3592502cbd50c2ab98da5f61c5c871f10f22de7e5", size = 289097, upload-time = "2026-02-19T19:02:33.485Z" }, - { url = "https://files.pythonhosted.org/packages/28/93/0651924c390c5740f5f896723f8ddd946a6c63083a7d8647231c343912ff/regex-2026.2.19-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:02b9e1b8a7ebe2807cd7bbdf662510c8e43053a23262b9f46ad4fc2dfc9d204e", size = 799147, upload-time = "2026-02-19T19:02:35.669Z" }, - { url = "https://files.pythonhosted.org/packages/a7/00/2078bd8bcd37d58a756989adbfd9f1d0151b7ca4085a9c2a07e917fbac61/regex-2026.2.19-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6c8fb3b19652e425ff24169dad3ee07f99afa7996caa9dfbb3a9106cd726f49a", size = 865239, upload-time = "2026-02-19T19:02:38.012Z" }, - { url = "https://files.pythonhosted.org/packages/2a/13/75195161ec16936b35a365fa8c1dd2ab29fd910dd2587765062b174d8cfc/regex-2026.2.19-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:50f1ee9488dd7a9fda850ec7c68cad7a32fa49fd19733f5403a3f92b451dcf73", size = 911904, upload-time = "2026-02-19T19:02:40.737Z" }, - { url = "https://files.pythonhosted.org/packages/96/72/ac42f6012179343d1c4bd0ffee8c948d841cb32ea188d37e96d80527fcc9/regex-2026.2.19-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ab780092b1424d13200aa5a62996e95f65ee3db8509be366437439cdc0af1a9f", size = 803518, upload-time = "2026-02-19T19:02:42.923Z" }, - { url = "https://files.pythonhosted.org/packages/bc/d1/75a08e2269b007b9783f0f86aa64488e023141219cb5f14dc1e69cda56c6/regex-2026.2.19-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:17648e1a88e72d88641b12635e70e6c71c5136ba14edba29bf8fc6834005a265", size = 775866, upload-time = "2026-02-19T19:02:45.189Z" }, - { url = "https://files.pythonhosted.org/packages/92/41/70e7d05faf6994c2ca7a9fcaa536da8f8e4031d45b0ec04b57040ede201f/regex-2026.2.19-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:2f914ae8c804c8a8a562fe216100bc156bfb51338c1f8d55fe32cf407774359a", size = 788224, upload-time = "2026-02-19T19:02:47.804Z" }, - { url = "https://files.pythonhosted.org/packages/c8/83/34a2dd601f9deb13c20545c674a55f4a05c90869ab73d985b74d639bac43/regex-2026.2.19-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:c7e121a918bbee3f12ac300ce0a0d2f2c979cf208fb071ed8df5a6323281915c", size = 859682, upload-time = "2026-02-19T19:02:50.583Z" }, - { url = "https://files.pythonhosted.org/packages/8e/30/136db9a09a7f222d6e48b806f3730e7af6499a8cad9c72ac0d49d52c746e/regex-2026.2.19-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2fedd459c791da24914ecc474feecd94cf7845efb262ac3134fe27cbd7eda799", size = 764223, upload-time = "2026-02-19T19:02:52.777Z" }, - { url = "https://files.pythonhosted.org/packages/9e/ea/bb947743c78a16df481fa0635c50aa1a439bb80b0e6dc24cd4e49c716679/regex-2026.2.19-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:ea8dfc99689240e61fb21b5fc2828f68b90abf7777d057b62d3166b7c1543c4c", size = 850101, upload-time = "2026-02-19T19:02:55.87Z" }, - { url = "https://files.pythonhosted.org/packages/25/27/e3bfe6e97a99f7393665926be02fef772da7f8aa59e50bc3134e4262a032/regex-2026.2.19-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:9fff45852160960f29e184ec8a5be5ab4063cfd0b168d439d1fc4ac3744bf29e", size = 789904, upload-time = "2026-02-19T19:02:58.523Z" }, - { url = "https://files.pythonhosted.org/packages/84/7b/7e2be6f00cea59d08761b027ad237002e90cac74b1607200ebaa2ba3d586/regex-2026.2.19-cp314-cp314-win32.whl", hash = "sha256:5390b130cce14a7d1db226a3896273b7b35be10af35e69f1cca843b6e5d2bb2d", size = 271784, upload-time = "2026-02-19T19:03:00.418Z" }, - { url = "https://files.pythonhosted.org/packages/f7/f6/639911530335773e7ec60bcaa519557b719586024c1d7eaad1daf87b646b/regex-2026.2.19-cp314-cp314-win_amd64.whl", hash = "sha256:e581f75d5c0b15669139ca1c2d3e23a65bb90e3c06ba9d9ea194c377c726a904", size = 280506, upload-time = "2026-02-19T19:03:02.302Z" }, - { url = "https://files.pythonhosted.org/packages/cd/ec/2582b56b4e036d46bb9b5d74a18548439ffa16c11cf59076419174d80f48/regex-2026.2.19-cp314-cp314-win_arm64.whl", hash = "sha256:7187fdee1be0896c1499a991e9bf7c78e4b56b7863e7405d7bb687888ac10c4b", size = 273557, upload-time = "2026-02-19T19:03:04.836Z" }, - { url = "https://files.pythonhosted.org/packages/49/0b/f901cfeb4efd83e4f5c3e9f91a6de77e8e5ceb18555698aca3a27e215ed3/regex-2026.2.19-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:5ec1d7c080832fdd4e150c6f5621fe674c70c63b3ae5a4454cebd7796263b175", size = 492196, upload-time = "2026-02-19T19:03:08.188Z" }, - { url = "https://files.pythonhosted.org/packages/94/0a/349b959e3da874e15eda853755567b4cde7e5309dbb1e07bfe910cfde452/regex-2026.2.19-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:8457c1bc10ee9b29cdfd897ccda41dce6bde0e9abd514bcfef7bcd05e254d411", size = 292878, upload-time = "2026-02-19T19:03:10.272Z" }, - { url = "https://files.pythonhosted.org/packages/98/b0/9d81b3c2c5ddff428f8c506713737278979a2c476f6e3675a9c51da0c389/regex-2026.2.19-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:cce8027010d1ffa3eb89a0b19621cdc78ae548ea2b49fea1f7bfb3ea77064c2b", size = 291235, upload-time = "2026-02-19T19:03:12.5Z" }, - { url = "https://files.pythonhosted.org/packages/04/e7/be7818df8691dbe9508c381ea2cc4c1153e4fdb1c4b06388abeaa93bd712/regex-2026.2.19-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:11c138febb40546ff9e026dbbc41dc9fb8b29e61013fa5848ccfe045f5b23b83", size = 807893, upload-time = "2026-02-19T19:03:15.064Z" }, - { url = "https://files.pythonhosted.org/packages/0c/b6/b898a8b983190cfa0276031c17beb73cfd1db07c03c8c37f606d80b655e2/regex-2026.2.19-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:74ff212aa61532246bb3036b3dfea62233414b0154b8bc3676975da78383cac3", size = 873696, upload-time = "2026-02-19T19:03:17.848Z" }, - { url = "https://files.pythonhosted.org/packages/1a/98/126ba671d54f19080ec87cad228fb4f3cc387fff8c4a01cb4e93f4ff9d94/regex-2026.2.19-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d00c95a2b6bfeb3ea1cb68d1751b1dfce2b05adc2a72c488d77a780db06ab867", size = 915493, upload-time = "2026-02-19T19:03:20.343Z" }, - { url = "https://files.pythonhosted.org/packages/b2/10/550c84a1a1a7371867fe8be2bea7df55e797cbca4709974811410e195c5d/regex-2026.2.19-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:311fcccb76af31be4c588d5a17f8f1a059ae8f4b097192896ebffc95612f223a", size = 813094, upload-time = "2026-02-19T19:03:23.287Z" }, - { url = "https://files.pythonhosted.org/packages/29/fb/ba221d2fc76a27b6b7d7a60f73a7a6a7bac21c6ba95616a08be2bcb434b0/regex-2026.2.19-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:77cfd6b5e7c4e8bf7a39d243ea05882acf5e3c7002b0ef4756de6606893b0ecd", size = 781583, upload-time = "2026-02-19T19:03:26.872Z" }, - { url = "https://files.pythonhosted.org/packages/26/f1/af79231301297c9e962679efc04a31361b58dc62dec1fc0cb4b8dd95956a/regex-2026.2.19-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:6380f29ff212ec922b6efb56100c089251940e0526a0d05aa7c2d9b571ddf2fe", size = 795875, upload-time = "2026-02-19T19:03:29.223Z" }, - { url = "https://files.pythonhosted.org/packages/a0/90/1e1d76cb0a2d0a4f38a039993e1c5cd971ae50435d751c5bae4f10e1c302/regex-2026.2.19-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:655f553a1fa3ab8a7fd570eca793408b8d26a80bfd89ed24d116baaf13a38969", size = 868916, upload-time = "2026-02-19T19:03:31.415Z" }, - { url = "https://files.pythonhosted.org/packages/9a/67/a1c01da76dbcfed690855a284c665cc0a370e7d02d1bd635cf9ff7dd74b8/regex-2026.2.19-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:015088b8558502f1f0bccd58754835aa154a7a5b0bd9d4c9b7b96ff4ae9ba876", size = 770386, upload-time = "2026-02-19T19:03:33.972Z" }, - { url = "https://files.pythonhosted.org/packages/49/6f/94842bf294f432ff3836bfd91032e2ecabea6d284227f12d1f935318c9c4/regex-2026.2.19-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:9e6693b8567a59459b5dda19104c4a4dbbd4a1c78833eacc758796f2cfef1854", size = 855007, upload-time = "2026-02-19T19:03:36.238Z" }, - { url = "https://files.pythonhosted.org/packages/ff/93/393cd203ca0d1d368f05ce12d2c7e91a324bc93c240db2e6d5ada05835f4/regex-2026.2.19-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:4071209fd4376ab5ceec72ad3507e9d3517c59e38a889079b98916477a871868", size = 799863, upload-time = "2026-02-19T19:03:38.497Z" }, - { url = "https://files.pythonhosted.org/packages/43/d9/35afda99bd92bf1a5831e55a4936d37ea4bed6e34c176a3c2238317faf4f/regex-2026.2.19-cp314-cp314t-win32.whl", hash = "sha256:2905ff4a97fad42f2d0834d8b1ea3c2f856ec209837e458d71a061a7d05f9f01", size = 274742, upload-time = "2026-02-19T19:03:40.804Z" }, - { url = "https://files.pythonhosted.org/packages/ae/42/7edc3344dcc87b698e9755f7f685d463852d481302539dae07135202d3ca/regex-2026.2.19-cp314-cp314t-win_amd64.whl", hash = "sha256:64128549b600987e0f335c2365879895f860a9161f283b14207c800a6ed623d3", size = 284443, upload-time = "2026-02-19T19:03:42.954Z" }, - { url = "https://files.pythonhosted.org/packages/3a/45/affdf2d851b42adf3d13fc5b3b059372e9bd299371fd84cf5723c45871fa/regex-2026.2.19-cp314-cp314t-win_arm64.whl", hash = "sha256:a09ae430e94c049dc6957f6baa35ee3418a3a77f3c12b6e02883bd80a2b679b0", size = 274932, upload-time = "2026-02-19T19:03:45.488Z" }, -] - [[package]] name = "requests" version = "2.33.0" @@ -4978,41 +3895,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fc/51/727abb13f44c1fcf6d145979e1535a35794db0f6e450a0cb46aa24732fe2/s3transfer-0.16.0-py3-none-any.whl", hash = "sha256:18e25d66fed509e3868dc1572b3f427ff947dd2c56f844a5bf09481ad3f3b2fe", size = 86830, upload-time = "2025-12-01T02:30:57.729Z" }, ] -[[package]] -name = "safetensors" -version = "0.7.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/29/9c/6e74567782559a63bd040a236edca26fd71bc7ba88de2ef35d75df3bca5e/safetensors-0.7.0.tar.gz", hash = "sha256:07663963b67e8bd9f0b8ad15bb9163606cd27cc5a1b96235a50d8369803b96b0", size = 200878, upload-time = "2025-11-19T15:18:43.199Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/fa/47/aef6c06649039accf914afef490268e1067ed82be62bcfa5b7e886ad15e8/safetensors-0.7.0-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:c82f4d474cf725255d9e6acf17252991c3c8aac038d6ef363a4bf8be2f6db517", size = 467781, upload-time = "2025-11-19T15:18:35.84Z" }, - { url = "https://files.pythonhosted.org/packages/e8/00/374c0c068e30cd31f1e1b46b4b5738168ec79e7689ca82ee93ddfea05109/safetensors-0.7.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:94fd4858284736bb67a897a41608b5b0c2496c9bdb3bf2af1fa3409127f20d57", size = 447058, upload-time = "2025-11-19T15:18:34.416Z" }, - { url = "https://files.pythonhosted.org/packages/f1/06/578ffed52c2296f93d7fd2d844cabfa92be51a587c38c8afbb8ae449ca89/safetensors-0.7.0-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e07d91d0c92a31200f25351f4acb2bc6aff7f48094e13ebb1d0fb995b54b6542", size = 491748, upload-time = "2025-11-19T15:18:09.79Z" }, - { url = "https://files.pythonhosted.org/packages/ae/33/1debbbb70e4791dde185edb9413d1fe01619255abb64b300157d7f15dddd/safetensors-0.7.0-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8469155f4cb518bafb4acf4865e8bb9d6804110d2d9bdcaa78564b9fd841e104", size = 503881, upload-time = "2025-11-19T15:18:16.145Z" }, - { url = "https://files.pythonhosted.org/packages/8e/1c/40c2ca924d60792c3be509833df711b553c60effbd91da6f5284a83f7122/safetensors-0.7.0-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:54bef08bf00a2bff599982f6b08e8770e09cc012d7bba00783fc7ea38f1fb37d", size = 623463, upload-time = "2025-11-19T15:18:21.11Z" }, - { url = "https://files.pythonhosted.org/packages/9b/3a/13784a9364bd43b0d61eef4bea2845039bc2030458b16594a1bd787ae26e/safetensors-0.7.0-cp38-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:42cb091236206bb2016d245c377ed383aa7f78691748f3bb6ee1bfa51ae2ce6a", size = 532855, upload-time = "2025-11-19T15:18:25.719Z" }, - { url = "https://files.pythonhosted.org/packages/a0/60/429e9b1cb3fc651937727befe258ea24122d9663e4d5709a48c9cbfceecb/safetensors-0.7.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dac7252938f0696ddea46f5e855dd3138444e82236e3be475f54929f0c510d48", size = 507152, upload-time = "2025-11-19T15:18:33.023Z" }, - { url = "https://files.pythonhosted.org/packages/3c/a8/4b45e4e059270d17af60359713ffd83f97900d45a6afa73aaa0d737d48b6/safetensors-0.7.0-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1d060c70284127fa805085d8f10fbd0962792aed71879d00864acda69dbab981", size = 541856, upload-time = "2025-11-19T15:18:31.075Z" }, - { url = "https://files.pythonhosted.org/packages/06/87/d26d8407c44175d8ae164a95b5a62707fcc445f3c0c56108e37d98070a3d/safetensors-0.7.0-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:cdab83a366799fa730f90a4ebb563e494f28e9e92c4819e556152ad55e43591b", size = 674060, upload-time = "2025-11-19T15:18:37.211Z" }, - { url = "https://files.pythonhosted.org/packages/11/f5/57644a2ff08dc6325816ba7217e5095f17269dada2554b658442c66aed51/safetensors-0.7.0-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:672132907fcad9f2aedcb705b2d7b3b93354a2aec1b2f706c4db852abe338f85", size = 771715, upload-time = "2025-11-19T15:18:38.689Z" }, - { url = "https://files.pythonhosted.org/packages/86/31/17883e13a814bd278ae6e266b13282a01049b0c81341da7fd0e3e71a80a3/safetensors-0.7.0-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:5d72abdb8a4d56d4020713724ba81dac065fedb7f3667151c4a637f1d3fb26c0", size = 714377, upload-time = "2025-11-19T15:18:40.162Z" }, - { url = "https://files.pythonhosted.org/packages/4a/d8/0c8a7dc9b41dcac53c4cbf9df2b9c83e0e0097203de8b37a712b345c0be5/safetensors-0.7.0-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b0f6d66c1c538d5a94a73aa9ddca8ccc4227e6c9ff555322ea40bdd142391dd4", size = 677368, upload-time = "2025-11-19T15:18:41.627Z" }, - { url = "https://files.pythonhosted.org/packages/05/e5/cb4b713c8a93469e3c5be7c3f8d77d307e65fe89673e731f5c2bfd0a9237/safetensors-0.7.0-cp38-abi3-win32.whl", hash = "sha256:c74af94bf3ac15ac4d0f2a7c7b4663a15f8c2ab15ed0fc7531ca61d0835eccba", size = 326423, upload-time = "2025-11-19T15:18:45.74Z" }, - { url = "https://files.pythonhosted.org/packages/5d/e6/ec8471c8072382cb91233ba7267fd931219753bb43814cbc71757bfd4dab/safetensors-0.7.0-cp38-abi3-win_amd64.whl", hash = "sha256:d1239932053f56f3456f32eb9625590cc7582e905021f94636202a864d470755", size = 341380, upload-time = "2025-11-19T15:18:44.427Z" }, -] - -[[package]] -name = "sentry-sdk" -version = "2.56.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "certifi" }, - { name = "urllib3" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/de/df/5008954f5466085966468612a7d1638487596ee6d2fd7fb51783a85351bf/sentry_sdk-2.56.0.tar.gz", hash = "sha256:fdab72030b69625665b2eeb9738bdde748ad254e8073085a0ce95382678e8168", size = 426820, upload-time = "2026-03-24T09:56:36.575Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/cd/1a/b3a3e9f6520493fed7997af4d2de7965d71549c62f994a8fd15f2ecd519e/sentry_sdk-2.56.0-py2.py3-none-any.whl", hash = "sha256:5afafb744ceb91d22f4cc650c6bd048ac6af5f7412dcc6c59305a2e36f4dbc02", size = 451568, upload-time = "2026-03-24T09:56:34.807Z" }, -] - [[package]] name = "setuptools" version = "82.0.1" @@ -5070,15 +3952,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6e/e1/bb81f93c9f403e3b573c429dd4838ec9b44e4ef35f3b0759eb49557ab6e3/slack_sdk-3.40.1-py2.py3-none-any.whl", hash = "sha256:cd8902252979aa248092b0d77f3a9ea3cc605bc5d53663ad728e892e26e14a65", size = 313687, upload-time = "2026-02-18T22:11:00.027Z" }, ] -[[package]] -name = "smmap" -version = "5.0.3" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/1f/ea/49c993d6dfdd7338c9b1000a0f36817ed7ec84577ae2e52f890d1a4ff909/smmap-5.0.3.tar.gz", hash = "sha256:4d9debb8b99007ae47165abc08670bd74cb74b5227dda7f643eccc4e9eb5642c", size = 22506, upload-time = "2026-03-09T03:43:26.1Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c1/d4/59e74daffcb57a07668852eeeb6035af9f32cbfd7a1d2511f17d2fe6a738/smmap-5.0.3-py3-none-any.whl", hash = "sha256:c106e05d5a61449cf6ba9a1e650227ecfb141590d2a98412103ff35d89fc7b2f", size = 24390, upload-time = "2026-03-09T03:43:24.361Z" }, -] - [[package]] name = "sniffio" version = "1.3.1" @@ -5113,59 +3986,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/4e/39/a61d4b83a7746b70d23d9173be688c0c6bfc7173772344b7442c2c155497/sounddevice-0.5.5-py3-none-win_arm64.whl", hash = "sha256:3861901ddd8230d2e0e8ae62ac320cdd4c688d81df89da036dcb812f757bb3e6", size = 317115, upload-time = "2026-01-23T18:36:42.235Z" }, ] -[[package]] -name = "sqlalchemy" -version = "2.0.48" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "greenlet", marker = "(python_full_version >= '3.12' and platform_machine == 'AMD64') or (python_full_version >= '3.12' and platform_machine == 'WIN32') or (python_full_version >= '3.12' and platform_machine == 'aarch64') or (python_full_version >= '3.12' and platform_machine == 'amd64') or (python_full_version >= '3.12' and platform_machine == 'ppc64le') or (python_full_version >= '3.12' and platform_machine == 'win32') or (python_full_version >= '3.12' and platform_machine == 'x86_64')" }, - { name = "typing-extensions", marker = "python_full_version >= '3.12'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/1f/73/b4a9737255583b5fa858e0bb8e116eb94b88c910164ed2ed719147bde3de/sqlalchemy-2.0.48.tar.gz", hash = "sha256:5ca74f37f3369b45e1f6b7b06afb182af1fd5dde009e4ffd831830d98cbe5fe7", size = 9886075, upload-time = "2026-03-02T15:28:51.474Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d7/6d/b8b78b5b80f3c3ab3f7fa90faa195ec3401f6d884b60221260fd4d51864c/sqlalchemy-2.0.48-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1b4c575df7368b3b13e0cebf01d4679f9a28ed2ae6c1cd0b1d5beffb6b2007dc", size = 2157184, upload-time = "2026-03-02T15:38:28.161Z" }, - { url = "https://files.pythonhosted.org/packages/21/4b/4f3d4a43743ab58b95b9ddf5580a265b593d017693df9e08bd55780af5bb/sqlalchemy-2.0.48-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e83e3f959aaa1c9df95c22c528096d94848a1bc819f5d0ebf7ee3df0ca63db6c", size = 3313555, upload-time = "2026-03-02T15:58:57.21Z" }, - { url = "https://files.pythonhosted.org/packages/21/dd/3b7c53f1dbbf736fd27041aee68f8ac52226b610f914085b1652c2323442/sqlalchemy-2.0.48-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6f7b7243850edd0b8b97043f04748f31de50cf426e939def5c16bedb540698f7", size = 3313057, upload-time = "2026-03-02T15:52:29.366Z" }, - { url = "https://files.pythonhosted.org/packages/d9/cc/3e600a90ae64047f33313d7d32e5ad025417f09d2ded487e8284b5e21a15/sqlalchemy-2.0.48-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:82745b03b4043e04600a6b665cb98697c4339b24e34d74b0a2ac0a2488b6f94d", size = 3265431, upload-time = "2026-03-02T15:58:59.096Z" }, - { url = "https://files.pythonhosted.org/packages/8b/19/780138dacfe3f5024f4cf96e4005e91edf6653d53d3673be4844578faf1d/sqlalchemy-2.0.48-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e5e088bf43f6ee6fec7dbf1ef7ff7774a616c236b5c0cb3e00662dd71a56b571", size = 3287646, upload-time = "2026-03-02T15:52:31.569Z" }, - { url = "https://files.pythonhosted.org/packages/40/fd/f32ced124f01a23151f4777e4c705f3a470adc7bd241d9f36a7c941a33bf/sqlalchemy-2.0.48-cp311-cp311-win32.whl", hash = "sha256:9c7d0a77e36b5f4b01ca398482230ab792061d243d715299b44a0b55c89fe617", size = 2116956, upload-time = "2026-03-02T15:46:54.535Z" }, - { url = "https://files.pythonhosted.org/packages/58/d5/dd767277f6feef12d05651538f280277e661698f617fa4d086cce6055416/sqlalchemy-2.0.48-cp311-cp311-win_amd64.whl", hash = "sha256:583849c743e0e3c9bb7446f5b5addeacedc168d657a69b418063dfdb2d90081c", size = 2141627, upload-time = "2026-03-02T15:46:55.849Z" }, - { url = "https://files.pythonhosted.org/packages/ef/91/a42ae716f8925e9659df2da21ba941f158686856107a61cc97a95e7647a3/sqlalchemy-2.0.48-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:348174f228b99f33ca1f773e85510e08927620caa59ffe7803b37170df30332b", size = 2155737, upload-time = "2026-03-02T15:49:13.207Z" }, - { url = "https://files.pythonhosted.org/packages/b9/52/f75f516a1f3888f027c1cfb5d22d4376f4b46236f2e8669dcb0cddc60275/sqlalchemy-2.0.48-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:53667b5f668991e279d21f94ccfa6e45b4e3f4500e7591ae59a8012d0f010dcb", size = 3337020, upload-time = "2026-03-02T15:50:34.547Z" }, - { url = "https://files.pythonhosted.org/packages/37/9a/0c28b6371e0cdcb14f8f1930778cb3123acfcbd2c95bb9cf6b4a2ba0cce3/sqlalchemy-2.0.48-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:34634e196f620c7a61d18d5cf7dc841ca6daa7961aed75d532b7e58b309ac894", size = 3349983, upload-time = "2026-03-02T15:53:25.542Z" }, - { url = "https://files.pythonhosted.org/packages/1c/46/0aee8f3ff20b1dcbceb46ca2d87fcc3d48b407925a383ff668218509d132/sqlalchemy-2.0.48-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:546572a1793cc35857a2ffa1fe0e58571af1779bcc1ffa7c9fb0839885ed69a9", size = 3279690, upload-time = "2026-03-02T15:50:36.277Z" }, - { url = "https://files.pythonhosted.org/packages/ce/8c/a957bc91293b49181350bfd55e6dfc6e30b7f7d83dc6792d72043274a390/sqlalchemy-2.0.48-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:07edba08061bc277bfdc772dd2a1a43978f5a45994dd3ede26391b405c15221e", size = 3314738, upload-time = "2026-03-02T15:53:27.519Z" }, - { url = "https://files.pythonhosted.org/packages/4b/44/1d257d9f9556661e7bdc83667cc414ba210acfc110c82938cb3611eea58f/sqlalchemy-2.0.48-cp312-cp312-win32.whl", hash = "sha256:908a3fa6908716f803b86896a09a2c4dde5f5ce2bb07aacc71ffebb57986ce99", size = 2115546, upload-time = "2026-03-02T15:54:31.591Z" }, - { url = "https://files.pythonhosted.org/packages/f2/af/c3c7e1f3a2b383155a16454df62ae8c62a30dd238e42e68c24cebebbfae6/sqlalchemy-2.0.48-cp312-cp312-win_amd64.whl", hash = "sha256:68549c403f79a8e25984376480959975212a670405e3913830614432b5daa07a", size = 2142484, upload-time = "2026-03-02T15:54:34.072Z" }, - { url = "https://files.pythonhosted.org/packages/d1/c6/569dc8bf3cd375abc5907e82235923e986799f301cd79a903f784b996fca/sqlalchemy-2.0.48-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e3070c03701037aa418b55d36532ecb8f8446ed0135acb71c678dbdf12f5b6e4", size = 2152599, upload-time = "2026-03-02T15:49:14.41Z" }, - { url = "https://files.pythonhosted.org/packages/6d/ff/f4e04a4bd5a24304f38cb0d4aa2ad4c0fb34999f8b884c656535e1b2b74c/sqlalchemy-2.0.48-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2645b7d8a738763b664a12a1542c89c940daa55196e8d73e55b169cc5c99f65f", size = 3278825, upload-time = "2026-03-02T15:50:38.269Z" }, - { url = "https://files.pythonhosted.org/packages/fe/88/cb59509e4668d8001818d7355d9995be90c321313078c912420603a7cb95/sqlalchemy-2.0.48-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b19151e76620a412c2ac1c6f977ab1b9fa7ad43140178345136456d5265b32ed", size = 3295200, upload-time = "2026-03-02T15:53:29.366Z" }, - { url = "https://files.pythonhosted.org/packages/87/dc/1609a4442aefd750ea2f32629559394ec92e89ac1d621a7f462b70f736ff/sqlalchemy-2.0.48-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5b193a7e29fd9fa56e502920dca47dffe60f97c863494946bd698c6058a55658", size = 3226876, upload-time = "2026-03-02T15:50:39.802Z" }, - { url = "https://files.pythonhosted.org/packages/37/c3/6ae2ab5ea2fa989fbac4e674de01224b7a9d744becaf59bb967d62e99bed/sqlalchemy-2.0.48-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:36ac4ddc3d33e852da9cb00ffb08cea62ca05c39711dc67062ca2bb1fae35fd8", size = 3265045, upload-time = "2026-03-02T15:53:31.421Z" }, - { url = "https://files.pythonhosted.org/packages/6f/82/ea4665d1bb98c50c19666e672f21b81356bd6077c4574e3d2bbb84541f53/sqlalchemy-2.0.48-cp313-cp313-win32.whl", hash = "sha256:389b984139278f97757ea9b08993e7b9d1142912e046ab7d82b3fbaeb0209131", size = 2113700, upload-time = "2026-03-02T15:54:35.825Z" }, - { url = "https://files.pythonhosted.org/packages/b7/2b/b9040bec58c58225f073f5b0c1870defe1940835549dafec680cbd58c3c3/sqlalchemy-2.0.48-cp313-cp313-win_amd64.whl", hash = "sha256:d612c976cbc2d17edfcc4c006874b764e85e990c29ce9bd411f926bbfb02b9a2", size = 2139487, upload-time = "2026-03-02T15:54:37.079Z" }, - { url = "https://files.pythonhosted.org/packages/f4/f4/7b17bd50244b78a49d22cc63c969d71dc4de54567dc152a9b46f6fae40ce/sqlalchemy-2.0.48-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:69f5bc24904d3bc3640961cddd2523e361257ef68585d6e364166dfbe8c78fae", size = 3558851, upload-time = "2026-03-02T15:57:48.607Z" }, - { url = "https://files.pythonhosted.org/packages/20/0d/213668e9aca61d370f7d2a6449ea4ec699747fac67d4bda1bb3d129025be/sqlalchemy-2.0.48-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fd08b90d211c086181caed76931ecfa2bdfc83eea3cfccdb0f82abc6c4b876cb", size = 3525525, upload-time = "2026-03-02T16:04:38.058Z" }, - { url = "https://files.pythonhosted.org/packages/85/d7/a84edf412979e7d59c69b89a5871f90a49228360594680e667cb2c46a828/sqlalchemy-2.0.48-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:1ccd42229aaac2df431562117ac7e667d702e8e44afdb6cf0e50fa3f18160f0b", size = 3466611, upload-time = "2026-03-02T15:57:50.759Z" }, - { url = "https://files.pythonhosted.org/packages/86/55/42404ce5770f6be26a2b0607e7866c31b9a4176c819e9a7a5e0a055770be/sqlalchemy-2.0.48-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f0dcbc588cd5b725162c076eb9119342f6579c7f7f55057bb7e3c6ff27e13121", size = 3475812, upload-time = "2026-03-02T16:04:40.092Z" }, - { url = "https://files.pythonhosted.org/packages/ae/ae/29b87775fadc43e627cf582fe3bda4d02e300f6b8f2747c764950d13784c/sqlalchemy-2.0.48-cp313-cp313t-win32.whl", hash = "sha256:9764014ef5e58aab76220c5664abb5d47d5bc858d9debf821e55cfdd0f128485", size = 2141335, upload-time = "2026-03-02T15:52:51.518Z" }, - { url = "https://files.pythonhosted.org/packages/91/44/f39d063c90f2443e5b46ec4819abd3d8de653893aae92df42a5c4f5843de/sqlalchemy-2.0.48-cp313-cp313t-win_amd64.whl", hash = "sha256:e2f35b4cccd9ed286ad62e0a3c3ac21e06c02abc60e20aa51a3e305a30f5fa79", size = 2173095, upload-time = "2026-03-02T15:52:52.79Z" }, - { url = "https://files.pythonhosted.org/packages/f7/b3/f437eaa1cf028bb3c927172c7272366393e73ccd104dcf5b6963f4ab5318/sqlalchemy-2.0.48-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:e2d0d88686e3d35a76f3e15a34e8c12d73fc94c1dea1cd55782e695cc14086dd", size = 2154401, upload-time = "2026-03-02T15:49:17.24Z" }, - { url = "https://files.pythonhosted.org/packages/6c/1c/b3abdf0f402aa3f60f0df6ea53d92a162b458fca2321d8f1f00278506402/sqlalchemy-2.0.48-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:49b7bddc1eebf011ea5ab722fdbe67a401caa34a350d278cc7733c0e88fecb1f", size = 3274528, upload-time = "2026-03-02T15:50:41.489Z" }, - { url = "https://files.pythonhosted.org/packages/f2/5e/327428a034407651a048f5e624361adf3f9fbac9d0fa98e981e9c6ff2f5e/sqlalchemy-2.0.48-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:426c5ca86415d9b8945c7073597e10de9644802e2ff502b8e1f11a7a2642856b", size = 3279523, upload-time = "2026-03-02T15:53:32.962Z" }, - { url = "https://files.pythonhosted.org/packages/2a/ca/ece73c81a918add0965b76b868b7b5359e068380b90ef1656ee995940c02/sqlalchemy-2.0.48-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:288937433bd44e3990e7da2402fabc44a3c6c25d3704da066b85b89a85474ae0", size = 3224312, upload-time = "2026-03-02T15:50:42.996Z" }, - { url = "https://files.pythonhosted.org/packages/88/11/fbaf1ae91fa4ee43f4fe79661cead6358644824419c26adb004941bdce7c/sqlalchemy-2.0.48-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:8183dc57ae7d9edc1346e007e840a9f3d6aa7b7f165203a99e16f447150140d2", size = 3246304, upload-time = "2026-03-02T15:53:34.937Z" }, - { url = "https://files.pythonhosted.org/packages/fa/a8/5fb0deb13930b4f2f698c5541ae076c18981173e27dd00376dbaea7a9c82/sqlalchemy-2.0.48-cp314-cp314-win32.whl", hash = "sha256:1182437cb2d97988cfea04cf6cdc0b0bb9c74f4d56ec3d08b81e23d621a28cc6", size = 2116565, upload-time = "2026-03-02T15:54:38.321Z" }, - { url = "https://files.pythonhosted.org/packages/95/7e/e83615cb63f80047f18e61e31e8e32257d39458426c23006deeaf48f463b/sqlalchemy-2.0.48-cp314-cp314-win_amd64.whl", hash = "sha256:144921da96c08feb9e2b052c5c5c1d0d151a292c6135623c6b2c041f2a45f9e0", size = 2142205, upload-time = "2026-03-02T15:54:39.831Z" }, - { url = "https://files.pythonhosted.org/packages/83/e3/69d8711b3f2c5135e9cde5f063bc1605860f0b2c53086d40c04017eb1f77/sqlalchemy-2.0.48-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5aee45fd2c6c0f2b9cdddf48c48535e7471e42d6fb81adfde801da0bd5b93241", size = 3563519, upload-time = "2026-03-02T15:57:52.387Z" }, - { url = "https://files.pythonhosted.org/packages/f8/4f/a7cce98facca73c149ea4578981594aaa5fd841e956834931de503359336/sqlalchemy-2.0.48-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7cddca31edf8b0653090cbb54562ca027c421c58ddde2c0685f49ff56a1690e0", size = 3528611, upload-time = "2026-03-02T16:04:42.097Z" }, - { url = "https://files.pythonhosted.org/packages/cd/7d/5936c7a03a0b0cb0fa0cc425998821c6029756b0855a8f7ee70fba1de955/sqlalchemy-2.0.48-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7a936f1bb23d370b7c8cc079d5fce4c7d18da87a33c6744e51a93b0f9e97e9b3", size = 3472326, upload-time = "2026-03-02T15:57:54.423Z" }, - { url = "https://files.pythonhosted.org/packages/f4/33/cea7dfc31b52904efe3dcdc169eb4514078887dff1f5ae28a7f4c5d54b3c/sqlalchemy-2.0.48-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:e004aa9248e8cb0a5f9b96d003ca7c1c0a5da8decd1066e7b53f59eb8ce7c62b", size = 3478453, upload-time = "2026-03-02T16:04:44.584Z" }, - { url = "https://files.pythonhosted.org/packages/c8/95/32107c4d13be077a9cae61e9ae49966a35dc4bf442a8852dd871db31f62e/sqlalchemy-2.0.48-cp314-cp314t-win32.whl", hash = "sha256:b8438ec5594980d405251451c5b7ea9aa58dda38eb7ac35fb7e4c696712ee24f", size = 2147209, upload-time = "2026-03-02T15:52:54.274Z" }, - { url = "https://files.pythonhosted.org/packages/d2/d7/1e073da7a4bc645eb83c76067284a0374e643bc4be57f14cc6414656f92c/sqlalchemy-2.0.48-cp314-cp314t-win_amd64.whl", hash = "sha256:d854b3970067297f3a7fbd7a4683587134aa9b3877ee15aa29eea478dc68f933", size = 2182198, upload-time = "2026-03-02T15:52:55.606Z" }, - { url = "https://files.pythonhosted.org/packages/46/2c/9664130905f03db57961b8980b05cab624afd114bf2be2576628a9f22da4/sqlalchemy-2.0.48-py3-none-any.whl", hash = "sha256:a66fe406437dd65cacd96a72689a3aaaecaebbcd62d81c5ac1c0fdbeac835096", size = 1940202, upload-time = "2026-03-02T15:52:43.285Z" }, -] - [[package]] name = "sse-starlette" version = "3.3.2" @@ -5192,35 +4012,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/81/0d/13d1d239a25cbfb19e740db83143e95c772a1fe10202dda4b76792b114dd/starlette-0.52.1-py3-none-any.whl", hash = "sha256:0029d43eb3d273bc4f83a08720b4912ea4b071087a3b48db01b7c839f7954d74", size = 74272, upload-time = "2026-01-18T13:34:09.188Z" }, ] -[[package]] -name = "streamlit" -version = "1.55.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "altair", marker = "python_full_version >= '3.12'" }, - { name = "blinker", marker = "python_full_version >= '3.12'" }, - { name = "cachetools", marker = "python_full_version >= '3.12'" }, - { name = "click", marker = "python_full_version >= '3.12'" }, - { name = "gitpython", marker = "python_full_version >= '3.12'" }, - { name = "numpy", marker = "python_full_version >= '3.12'" }, - { name = "packaging", marker = "python_full_version >= '3.12'" }, - { name = "pandas", marker = "python_full_version >= '3.12'" }, - { name = "pillow", marker = "python_full_version >= '3.12'" }, - { name = "protobuf", marker = "python_full_version >= '3.12'" }, - { name = "pyarrow", marker = "python_full_version >= '3.12'" }, - { name = "pydeck", marker = "python_full_version >= '3.12'" }, - { name = "requests", marker = "python_full_version >= '3.12'" }, - { name = "tenacity", marker = "python_full_version >= '3.12'" }, - { name = "toml", marker = "python_full_version >= '3.12'" }, - { name = "tornado", marker = "python_full_version >= '3.12'" }, - { name = "typing-extensions", marker = "python_full_version >= '3.12'" }, - { name = "watchdog", marker = "python_full_version >= '3.12' and sys_platform != 'darwin'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/92/8e/f2b8b4fa8ba65aae251170c54f8ce198fb588fc348301c2b624f8c63efac/streamlit-1.55.0.tar.gz", hash = "sha256:015e512bbd02d000f4047e51118dc086b70e7d9c46b4a11a33c2509731379626", size = 8612008, upload-time = "2026-03-03T22:26:02.149Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/dc/e6/412c1e1f200ca8c32ecf10201839183e261ad61ced3ede34a66f6d4be3cf/streamlit-1.55.0-py3-none-any.whl", hash = "sha256:1e4a16449c6131696180f4ddb40ea8c51834e89c2a43e1b0362bc9b1cfd9b415", size = 9075714, upload-time = "2026-03-03T22:25:59.126Z" }, -] - [[package]] name = "sympy" version = "1.14.0" @@ -5272,77 +4063,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/33/d1/8bb87d21e9aeb323cc03034f5eaf2c8f69841e40e4853c2627edf8111ed3/termcolor-3.3.0-py3-none-any.whl", hash = "sha256:cf642efadaf0a8ebbbf4bc7a31cec2f9b5f21a9f726f4ccbb08192c9c26f43a5", size = 7734, upload-time = "2025-12-29T12:55:20.718Z" }, ] -[[package]] -name = "tiktoken" -version = "0.12.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "regex", marker = "python_full_version >= '3.12'" }, - { name = "requests", marker = "python_full_version >= '3.12'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/7d/ab/4d017d0f76ec3171d469d80fc03dfbb4e48a4bcaddaa831b31d526f05edc/tiktoken-0.12.0.tar.gz", hash = "sha256:b18ba7ee2b093863978fcb14f74b3707cdc8d4d4d3836853ce7ec60772139931", size = 37806, upload-time = "2025-10-06T20:22:45.419Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/de/46/21ea696b21f1d6d1efec8639c204bdf20fde8bafb351e1355c72c5d7de52/tiktoken-0.12.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:6e227c7f96925003487c33b1b32265fad2fbcec2b7cf4817afb76d416f40f6bb", size = 1051565, upload-time = "2025-10-06T20:21:44.566Z" }, - { url = "https://files.pythonhosted.org/packages/c9/d9/35c5d2d9e22bb2a5f74ba48266fb56c63d76ae6f66e02feb628671c0283e/tiktoken-0.12.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c06cf0fcc24c2cb2adb5e185c7082a82cba29c17575e828518c2f11a01f445aa", size = 995284, upload-time = "2025-10-06T20:21:45.622Z" }, - { url = "https://files.pythonhosted.org/packages/01/84/961106c37b8e49b9fdcf33fe007bb3a8fdcc380c528b20cc7fbba80578b8/tiktoken-0.12.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:f18f249b041851954217e9fd8e5c00b024ab2315ffda5ed77665a05fa91f42dc", size = 1129201, upload-time = "2025-10-06T20:21:47.074Z" }, - { url = "https://files.pythonhosted.org/packages/6a/d0/3d9275198e067f8b65076a68894bb52fd253875f3644f0a321a720277b8a/tiktoken-0.12.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:47a5bc270b8c3db00bb46ece01ef34ad050e364b51d406b6f9730b64ac28eded", size = 1152444, upload-time = "2025-10-06T20:21:48.139Z" }, - { url = "https://files.pythonhosted.org/packages/78/db/a58e09687c1698a7c592e1038e01c206569b86a0377828d51635561f8ebf/tiktoken-0.12.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:508fa71810c0efdcd1b898fda574889ee62852989f7c1667414736bcb2b9a4bd", size = 1195080, upload-time = "2025-10-06T20:21:49.246Z" }, - { url = "https://files.pythonhosted.org/packages/9e/1b/a9e4d2bf91d515c0f74afc526fd773a812232dd6cda33ebea7f531202325/tiktoken-0.12.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a1af81a6c44f008cba48494089dd98cccb8b313f55e961a52f5b222d1e507967", size = 1255240, upload-time = "2025-10-06T20:21:50.274Z" }, - { url = "https://files.pythonhosted.org/packages/9d/15/963819345f1b1fb0809070a79e9dd96938d4ca41297367d471733e79c76c/tiktoken-0.12.0-cp311-cp311-win_amd64.whl", hash = "sha256:3e68e3e593637b53e56f7237be560f7a394451cb8c11079755e80ae64b9e6def", size = 879422, upload-time = "2025-10-06T20:21:51.734Z" }, - { url = "https://files.pythonhosted.org/packages/a4/85/be65d39d6b647c79800fd9d29241d081d4eeb06271f383bb87200d74cf76/tiktoken-0.12.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b97f74aca0d78a1ff21b8cd9e9925714c15a9236d6ceacf5c7327c117e6e21e8", size = 1050728, upload-time = "2025-10-06T20:21:52.756Z" }, - { url = "https://files.pythonhosted.org/packages/4a/42/6573e9129bc55c9bf7300b3a35bef2c6b9117018acca0dc760ac2d93dffe/tiktoken-0.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2b90f5ad190a4bb7c3eb30c5fa32e1e182ca1ca79f05e49b448438c3e225a49b", size = 994049, upload-time = "2025-10-06T20:21:53.782Z" }, - { url = "https://files.pythonhosted.org/packages/66/c5/ed88504d2f4a5fd6856990b230b56d85a777feab84e6129af0822f5d0f70/tiktoken-0.12.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:65b26c7a780e2139e73acc193e5c63ac754021f160df919add909c1492c0fb37", size = 1129008, upload-time = "2025-10-06T20:21:54.832Z" }, - { url = "https://files.pythonhosted.org/packages/f4/90/3dae6cc5436137ebd38944d396b5849e167896fc2073da643a49f372dc4f/tiktoken-0.12.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:edde1ec917dfd21c1f2f8046b86348b0f54a2c0547f68149d8600859598769ad", size = 1152665, upload-time = "2025-10-06T20:21:56.129Z" }, - { url = "https://files.pythonhosted.org/packages/a3/fe/26df24ce53ffde419a42f5f53d755b995c9318908288c17ec3f3448313a3/tiktoken-0.12.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:35a2f8ddd3824608b3d650a000c1ef71f730d0c56486845705a8248da00f9fe5", size = 1194230, upload-time = "2025-10-06T20:21:57.546Z" }, - { url = "https://files.pythonhosted.org/packages/20/cc/b064cae1a0e9fac84b0d2c46b89f4e57051a5f41324e385d10225a984c24/tiktoken-0.12.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:83d16643edb7fa2c99eff2ab7733508aae1eebb03d5dfc46f5565862810f24e3", size = 1254688, upload-time = "2025-10-06T20:21:58.619Z" }, - { url = "https://files.pythonhosted.org/packages/81/10/b8523105c590c5b8349f2587e2fdfe51a69544bd5a76295fc20f2374f470/tiktoken-0.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:ffc5288f34a8bc02e1ea7047b8d041104791d2ddbf42d1e5fa07822cbffe16bd", size = 878694, upload-time = "2025-10-06T20:21:59.876Z" }, - { url = "https://files.pythonhosted.org/packages/00/61/441588ee21e6b5cdf59d6870f86beb9789e532ee9718c251b391b70c68d6/tiktoken-0.12.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:775c2c55de2310cc1bc9a3ad8826761cbdc87770e586fd7b6da7d4589e13dab3", size = 1050802, upload-time = "2025-10-06T20:22:00.96Z" }, - { url = "https://files.pythonhosted.org/packages/1f/05/dcf94486d5c5c8d34496abe271ac76c5b785507c8eae71b3708f1ad9b45a/tiktoken-0.12.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a01b12f69052fbe4b080a2cfb867c4de12c704b56178edf1d1d7b273561db160", size = 993995, upload-time = "2025-10-06T20:22:02.788Z" }, - { url = "https://files.pythonhosted.org/packages/a0/70/5163fe5359b943f8db9946b62f19be2305de8c3d78a16f629d4165e2f40e/tiktoken-0.12.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:01d99484dc93b129cd0964f9d34eee953f2737301f18b3c7257bf368d7615baa", size = 1128948, upload-time = "2025-10-06T20:22:03.814Z" }, - { url = "https://files.pythonhosted.org/packages/0c/da/c028aa0babf77315e1cef357d4d768800c5f8a6de04d0eac0f377cb619fa/tiktoken-0.12.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:4a1a4fcd021f022bfc81904a911d3df0f6543b9e7627b51411da75ff2fe7a1be", size = 1151986, upload-time = "2025-10-06T20:22:05.173Z" }, - { url = "https://files.pythonhosted.org/packages/a0/5a/886b108b766aa53e295f7216b509be95eb7d60b166049ce2c58416b25f2a/tiktoken-0.12.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:981a81e39812d57031efdc9ec59fa32b2a5a5524d20d4776574c4b4bd2e9014a", size = 1194222, upload-time = "2025-10-06T20:22:06.265Z" }, - { url = "https://files.pythonhosted.org/packages/f4/f8/4db272048397636ac7a078d22773dd2795b1becee7bc4922fe6207288d57/tiktoken-0.12.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9baf52f84a3f42eef3ff4e754a0db79a13a27921b457ca9832cf944c6be4f8f3", size = 1255097, upload-time = "2025-10-06T20:22:07.403Z" }, - { url = "https://files.pythonhosted.org/packages/8e/32/45d02e2e0ea2be3a9ed22afc47d93741247e75018aac967b713b2941f8ea/tiktoken-0.12.0-cp313-cp313-win_amd64.whl", hash = "sha256:b8a0cd0c789a61f31bf44851defbd609e8dd1e2c8589c614cc1060940ef1f697", size = 879117, upload-time = "2025-10-06T20:22:08.418Z" }, - { url = "https://files.pythonhosted.org/packages/ce/76/994fc868f88e016e6d05b0da5ac24582a14c47893f4474c3e9744283f1d5/tiktoken-0.12.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:d5f89ea5680066b68bcb797ae85219c72916c922ef0fcdd3480c7d2315ffff16", size = 1050309, upload-time = "2025-10-06T20:22:10.939Z" }, - { url = "https://files.pythonhosted.org/packages/f6/b8/57ef1456504c43a849821920d582a738a461b76a047f352f18c0b26c6516/tiktoken-0.12.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b4e7ed1c6a7a8a60a3230965bdedba8cc58f68926b835e519341413370e0399a", size = 993712, upload-time = "2025-10-06T20:22:12.115Z" }, - { url = "https://files.pythonhosted.org/packages/72/90/13da56f664286ffbae9dbcfadcc625439142675845baa62715e49b87b68b/tiktoken-0.12.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:fc530a28591a2d74bce821d10b418b26a094bf33839e69042a6e86ddb7a7fb27", size = 1128725, upload-time = "2025-10-06T20:22:13.541Z" }, - { url = "https://files.pythonhosted.org/packages/05/df/4f80030d44682235bdaecd7346c90f67ae87ec8f3df4a3442cb53834f7e4/tiktoken-0.12.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:06a9f4f49884139013b138920a4c393aa6556b2f8f536345f11819389c703ebb", size = 1151875, upload-time = "2025-10-06T20:22:14.559Z" }, - { url = "https://files.pythonhosted.org/packages/22/1f/ae535223a8c4ef4c0c1192e3f9b82da660be9eb66b9279e95c99288e9dab/tiktoken-0.12.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:04f0e6a985d95913cabc96a741c5ffec525a2c72e9df086ff17ebe35985c800e", size = 1194451, upload-time = "2025-10-06T20:22:15.545Z" }, - { url = "https://files.pythonhosted.org/packages/78/a7/f8ead382fce0243cb625c4f266e66c27f65ae65ee9e77f59ea1653b6d730/tiktoken-0.12.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:0ee8f9ae00c41770b5f9b0bb1235474768884ae157de3beb5439ca0fd70f3e25", size = 1253794, upload-time = "2025-10-06T20:22:16.624Z" }, - { url = "https://files.pythonhosted.org/packages/93/e0/6cc82a562bc6365785a3ff0af27a2a092d57c47d7a81d9e2295d8c36f011/tiktoken-0.12.0-cp313-cp313t-win_amd64.whl", hash = "sha256:dc2dd125a62cb2b3d858484d6c614d136b5b848976794edfb63688d539b8b93f", size = 878777, upload-time = "2025-10-06T20:22:18.036Z" }, - { url = "https://files.pythonhosted.org/packages/72/05/3abc1db5d2c9aadc4d2c76fa5640134e475e58d9fbb82b5c535dc0de9b01/tiktoken-0.12.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:a90388128df3b3abeb2bfd1895b0681412a8d7dc644142519e6f0a97c2111646", size = 1050188, upload-time = "2025-10-06T20:22:19.563Z" }, - { url = "https://files.pythonhosted.org/packages/e3/7b/50c2f060412202d6c95f32b20755c7a6273543b125c0985d6fa9465105af/tiktoken-0.12.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:da900aa0ad52247d8794e307d6446bd3cdea8e192769b56276695d34d2c9aa88", size = 993978, upload-time = "2025-10-06T20:22:20.702Z" }, - { url = "https://files.pythonhosted.org/packages/14/27/bf795595a2b897e271771cd31cb847d479073497344c637966bdf2853da1/tiktoken-0.12.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:285ba9d73ea0d6171e7f9407039a290ca77efcdb026be7769dccc01d2c8d7fff", size = 1129271, upload-time = "2025-10-06T20:22:22.06Z" }, - { url = "https://files.pythonhosted.org/packages/f5/de/9341a6d7a8f1b448573bbf3425fa57669ac58258a667eb48a25dfe916d70/tiktoken-0.12.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:d186a5c60c6a0213f04a7a802264083dea1bbde92a2d4c7069e1a56630aef830", size = 1151216, upload-time = "2025-10-06T20:22:23.085Z" }, - { url = "https://files.pythonhosted.org/packages/75/0d/881866647b8d1be4d67cb24e50d0c26f9f807f994aa1510cb9ba2fe5f612/tiktoken-0.12.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:604831189bd05480f2b885ecd2d1986dc7686f609de48208ebbbddeea071fc0b", size = 1194860, upload-time = "2025-10-06T20:22:24.602Z" }, - { url = "https://files.pythonhosted.org/packages/b3/1e/b651ec3059474dab649b8d5b69f5c65cd8fcd8918568c1935bd4136c9392/tiktoken-0.12.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:8f317e8530bb3a222547b85a58583238c8f74fd7a7408305f9f63246d1a0958b", size = 1254567, upload-time = "2025-10-06T20:22:25.671Z" }, - { url = "https://files.pythonhosted.org/packages/80/57/ce64fd16ac390fafde001268c364d559447ba09b509181b2808622420eec/tiktoken-0.12.0-cp314-cp314-win_amd64.whl", hash = "sha256:399c3dd672a6406719d84442299a490420b458c44d3ae65516302a99675888f3", size = 921067, upload-time = "2025-10-06T20:22:26.753Z" }, - { url = "https://files.pythonhosted.org/packages/ac/a4/72eed53e8976a099539cdd5eb36f241987212c29629d0a52c305173e0a68/tiktoken-0.12.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:c2c714c72bc00a38ca969dae79e8266ddec999c7ceccd603cc4f0d04ccd76365", size = 1050473, upload-time = "2025-10-06T20:22:27.775Z" }, - { url = "https://files.pythonhosted.org/packages/e6/d7/0110b8f54c008466b19672c615f2168896b83706a6611ba6e47313dbc6e9/tiktoken-0.12.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:cbb9a3ba275165a2cb0f9a83f5d7025afe6b9d0ab01a22b50f0e74fee2ad253e", size = 993855, upload-time = "2025-10-06T20:22:28.799Z" }, - { url = "https://files.pythonhosted.org/packages/5f/77/4f268c41a3957c418b084dd576ea2fad2e95da0d8e1ab705372892c2ca22/tiktoken-0.12.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:dfdfaa5ffff8993a3af94d1125870b1d27aed7cb97aa7eb8c1cefdbc87dbee63", size = 1129022, upload-time = "2025-10-06T20:22:29.981Z" }, - { url = "https://files.pythonhosted.org/packages/4e/2b/fc46c90fe5028bd094cd6ee25a7db321cb91d45dc87531e2bdbb26b4867a/tiktoken-0.12.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:584c3ad3d0c74f5269906eb8a659c8bfc6144a52895d9261cdaf90a0ae5f4de0", size = 1150736, upload-time = "2025-10-06T20:22:30.996Z" }, - { url = "https://files.pythonhosted.org/packages/28/c0/3c7a39ff68022ddfd7d93f3337ad90389a342f761c4d71de99a3ccc57857/tiktoken-0.12.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:54c891b416a0e36b8e2045b12b33dd66fb34a4fe7965565f1b482da50da3e86a", size = 1194908, upload-time = "2025-10-06T20:22:32.073Z" }, - { url = "https://files.pythonhosted.org/packages/ab/0d/c1ad6f4016a3968c048545f5d9b8ffebf577774b2ede3e2e352553b685fe/tiktoken-0.12.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5edb8743b88d5be814b1a8a8854494719080c28faaa1ccbef02e87354fe71ef0", size = 1253706, upload-time = "2025-10-06T20:22:33.385Z" }, - { url = "https://files.pythonhosted.org/packages/af/df/c7891ef9d2712ad774777271d39fdef63941ffba0a9d59b7ad1fd2765e57/tiktoken-0.12.0-cp314-cp314t-win_amd64.whl", hash = "sha256:f61c0aea5565ac82e2ec50a05e02a6c44734e91b51c10510b084ea1b8e633a71", size = 920667, upload-time = "2025-10-06T20:22:34.444Z" }, -] - -[[package]] -name = "tinker" -version = "0.18.0" -source = { git = "https://github.com/thinking-machines-lab/tinker.git?rev=30517b667f18a3dfb7ef33fb56cf686d5820ba2b#30517b667f18a3dfb7ef33fb56cf686d5820ba2b" } -dependencies = [ - { name = "anyio" }, - { name = "click" }, - { name = "distro" }, - { name = "httpx", extra = ["http2"] }, - { name = "numpy" }, - { name = "pydantic" }, - { name = "rich" }, - { name = "sniffio" }, - { name = "transformers" }, - { name = "typing-extensions" }, -] - [[package]] name = "tokenizers" version = "0.22.2" @@ -5407,26 +4127,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/16/e1/3079a9ff9b8e11b846c6ac5c8b5bfb7ff225eee721825310c91b3b50304f/tqdm-4.67.3-py3-none-any.whl", hash = "sha256:ee1e4c0e59148062281c49d80b25b67771a127c85fc9676d3be5f243206826bf", size = 78374, upload-time = "2026-02-03T17:35:50.982Z" }, ] -[[package]] -name = "transformers" -version = "5.3.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "huggingface-hub" }, - { name = "numpy" }, - { name = "packaging" }, - { name = "pyyaml" }, - { name = "regex" }, - { name = "safetensors" }, - { name = "tokenizers" }, - { name = "tqdm" }, - { name = "typer" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/fc/1a/70e830d53ecc96ce69cfa8de38f163712d2b43ac52fbd743f39f56025c31/transformers-5.3.0.tar.gz", hash = "sha256:009555b364029da9e2946d41f1c5de9f15e6b1df46b189b7293f33a161b9c557", size = 8830831, upload-time = "2026-03-04T17:41:46.119Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/b8/88/ae8320064e32679a5429a2c9ebbc05c2bf32cefb6e076f9b07f6d685a9b4/transformers-5.3.0-py3-none-any.whl", hash = "sha256:50ac8c89c3c7033444fb3f9f53138096b997ebb70d4b5e50a2e810bf12d3d29a", size = 10661827, upload-time = "2026-03-04T17:41:42.722Z" }, -] - [[package]] name = "ty" version = "0.0.21" @@ -5660,53 +4360,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/65/3a/0137d5b157845e1d41a70130d8dce8ba15d8712f34619693cda04ecb8f02/vercel_workers-0.0.16-py3-none-any.whl", hash = "sha256:542be839e46e236a68cc308695ccc3c970d76de72c978d7f416cc6ce09688896", size = 50141, upload-time = "2026-04-13T21:23:28.652Z" }, ] -[[package]] -name = "wandb" -version = "0.25.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "click" }, - { name = "gitpython" }, - { name = "packaging" }, - { name = "platformdirs" }, - { name = "protobuf" }, - { name = "pydantic" }, - { name = "pyyaml" }, - { name = "requests" }, - { name = "sentry-sdk" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/60/bb/eb579bf9abac70934a014a9d4e45346aab307994f3021d201bebe5fa25ec/wandb-0.25.1.tar.gz", hash = "sha256:b2a95cd777ecbe7499599a43158834983448a0048329bc7210ef46ca18d21994", size = 43983308, upload-time = "2026-03-10T23:51:44.227Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e7/d8/873553b6818499d1b1de314067d528b892897baf0dc81fedc0e845abc2dd/wandb-0.25.1-py3-none-macosx_12_0_arm64.whl", hash = "sha256:9bb0679a3e2dcd96db9d9b6d3e17d046241d8d122974b24facb85cc93309a8c9", size = 23615900, upload-time = "2026-03-10T23:51:06.278Z" }, - { url = "https://files.pythonhosted.org/packages/71/ea/b131f319aaa5d0bf7572b6bfcff3dd89e1cf92b17eee443bbab71d12d74c/wandb-0.25.1-py3-none-macosx_12_0_x86_64.whl", hash = "sha256:0fb13ed18914027523e7b4fc20380c520e0d10da0ee452f924a13f84509fbe12", size = 25576144, upload-time = "2026-03-10T23:51:11.527Z" }, - { url = "https://files.pythonhosted.org/packages/70/5f/81508581f0bb77b0495665c1c78e77606a48e66e855ca71ba7c8ae29efa4/wandb-0.25.1-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:cc4521eb5223429ddab5e8eee9b42fdf4caabdf0bc4e0e809042720e5fbef0ed", size = 23070425, upload-time = "2026-03-10T23:51:15.71Z" }, - { url = "https://files.pythonhosted.org/packages/f2/c7/445155ef010e2e35d190797d7c36ff441e062a5b566a6da4778e22233395/wandb-0.25.1-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:e73b4c55b947edae349232d5845204d30fac88e18eb4ad1d4b96bf7cf898405a", size = 25628142, upload-time = "2026-03-10T23:51:19.326Z" }, - { url = "https://files.pythonhosted.org/packages/d5/63/f5c55ee00cf481ef1ccd3c385a0585ad52e7840d08419d4f82ddbeeea959/wandb-0.25.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:22b84065aa398e1624d2e5ad79e08bc4d2af41a6db61697b03b3aaba332977c6", size = 23123172, upload-time = "2026-03-10T23:51:23.418Z" }, - { url = "https://files.pythonhosted.org/packages/3e/d9/19eb7974c0e9253bcbaee655222c0f0e1a52e63e9479ee711b4208f8ac31/wandb-0.25.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:005c4c6b5126ef8f4b4110e5372d950918b00637d6dc4b615ad17445f9739478", size = 25714479, upload-time = "2026-03-10T23:51:27.421Z" }, - { url = "https://files.pythonhosted.org/packages/11/19/466c1d03323a4a0ed7d4036a59b18d6b6f67cb5032e444205927e226b18d/wandb-0.25.1-py3-none-win32.whl", hash = "sha256:8f2d04f16b88d65bfba9d79fb945f6c64e2686215469a841936e0972be8ec6a5", size = 24967338, upload-time = "2026-03-10T23:51:31.833Z" }, - { url = "https://files.pythonhosted.org/packages/89/22/680d34c1587f3a979c701b66d71aa7c42b4ef2fdf0774f67034e618e834e/wandb-0.25.1-py3-none-win_amd64.whl", hash = "sha256:62db5166de14456156d7a85953a58733a631228e6d4248a753605f75f75fb845", size = 24967343, upload-time = "2026-03-10T23:51:36.026Z" }, - { url = "https://files.pythonhosted.org/packages/c4/e8/76836b75d401ff5912aaf513176e64557ceaec4c4946bfd38a698ff84d48/wandb-0.25.1-py3-none-win_arm64.whl", hash = "sha256:cc7c34b70cf4b7be4d395541e82e325fd9d2be978d62c9ec01f1a7141523b6bb", size = 22080774, upload-time = "2026-03-10T23:51:40.196Z" }, -] - -[[package]] -name = "watchdog" -version = "6.0.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/db/7d/7f3d619e951c88ed75c6037b246ddcf2d322812ee8ea189be89511721d54/watchdog-6.0.0.tar.gz", hash = "sha256:9ddf7c82fda3ae8e24decda1338ede66e1c99883db93711d8fb941eaa2d8c282", size = 131220, upload-time = "2024-11-01T14:07:13.037Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a9/c7/ca4bf3e518cb57a686b2feb4f55a1892fd9a3dd13f470fca14e00f80ea36/watchdog-6.0.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7607498efa04a3542ae3e05e64da8202e58159aa1fa4acddf7678d34a35d4f13", size = 79079, upload-time = "2024-11-01T14:06:59.472Z" }, - { url = "https://files.pythonhosted.org/packages/5c/51/d46dc9332f9a647593c947b4b88e2381c8dfc0942d15b8edc0310fa4abb1/watchdog-6.0.0-py3-none-manylinux2014_armv7l.whl", hash = "sha256:9041567ee8953024c83343288ccc458fd0a2d811d6a0fd68c4c22609e3490379", size = 79078, upload-time = "2024-11-01T14:07:01.431Z" }, - { url = "https://files.pythonhosted.org/packages/d4/57/04edbf5e169cd318d5f07b4766fee38e825d64b6913ca157ca32d1a42267/watchdog-6.0.0-py3-none-manylinux2014_i686.whl", hash = "sha256:82dc3e3143c7e38ec49d61af98d6558288c415eac98486a5c581726e0737c00e", size = 79076, upload-time = "2024-11-01T14:07:02.568Z" }, - { url = "https://files.pythonhosted.org/packages/ab/cc/da8422b300e13cb187d2203f20b9253e91058aaf7db65b74142013478e66/watchdog-6.0.0-py3-none-manylinux2014_ppc64.whl", hash = "sha256:212ac9b8bf1161dc91bd09c048048a95ca3a4c4f5e5d4a7d1b1a7d5752a7f96f", size = 79077, upload-time = "2024-11-01T14:07:03.893Z" }, - { url = "https://files.pythonhosted.org/packages/2c/3b/b8964e04ae1a025c44ba8e4291f86e97fac443bca31de8bd98d3263d2fcf/watchdog-6.0.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:e3df4cbb9a450c6d49318f6d14f4bbc80d763fa587ba46ec86f99f9e6876bb26", size = 79078, upload-time = "2024-11-01T14:07:05.189Z" }, - { url = "https://files.pythonhosted.org/packages/62/ae/a696eb424bedff7407801c257d4b1afda455fe40821a2be430e173660e81/watchdog-6.0.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:2cce7cfc2008eb51feb6aab51251fd79b85d9894e98ba847408f662b3395ca3c", size = 79077, upload-time = "2024-11-01T14:07:06.376Z" }, - { url = "https://files.pythonhosted.org/packages/b5/e8/dbf020b4d98251a9860752a094d09a65e1b436ad181faf929983f697048f/watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:20ffe5b202af80ab4266dcd3e91aae72bf2da48c0d33bdb15c66658e685e94e2", size = 79078, upload-time = "2024-11-01T14:07:07.547Z" }, - { url = "https://files.pythonhosted.org/packages/07/f6/d0e5b343768e8bcb4cda79f0f2f55051bf26177ecd5651f84c07567461cf/watchdog-6.0.0-py3-none-win32.whl", hash = "sha256:07df1fdd701c5d4c8e55ef6cf55b8f0120fe1aef7ef39a1c6fc6bc2e606d517a", size = 79065, upload-time = "2024-11-01T14:07:09.525Z" }, - { url = "https://files.pythonhosted.org/packages/db/d9/c495884c6e548fce18a8f40568ff120bc3a4b7b99813081c8ac0c936fa64/watchdog-6.0.0-py3-none-win_amd64.whl", hash = "sha256:cbafb470cf848d93b5d013e2ecb245d4aa1c8fd0504e863ccefa32445359d680", size = 79070, upload-time = "2024-11-01T14:07:10.686Z" }, - { url = "https://files.pythonhosted.org/packages/33/e8/e40370e6d74ddba47f002a32919d91310d6074130fe4e17dabcafc15cbf1/watchdog-6.0.0-py3-none-win_ia64.whl", hash = "sha256:a1914259fa9e1454315171103c6a30961236f508b9b623eae470268bbcc6a22f", size = 79067, upload-time = "2024-11-01T14:07:11.845Z" }, -] - [[package]] name = "watchfiles" version = "1.1.1" @@ -5904,109 +4557,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1f/f6/a933bd70f98e9cf3e08167fc5cd7aaaca49147e48411c0bd5ae701bb2194/wrapt-1.17.3-py3-none-any.whl", hash = "sha256:7171ae35d2c33d326ac19dd8facb1e82e5fd04ef8c6c0e394d7af55a55051c22", size = 23591, upload-time = "2025-08-12T05:53:20.674Z" }, ] -[[package]] -name = "xxhash" -version = "3.6.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/02/84/30869e01909fb37a6cc7e18688ee8bf1e42d57e7e0777636bd47524c43c7/xxhash-3.6.0.tar.gz", hash = "sha256:f0162a78b13a0d7617b2845b90c763339d1f1d82bb04a4b07f4ab535cc5e05d6", size = 85160, upload-time = "2025-10-02T14:37:08.097Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/17/d4/cc2f0400e9154df4b9964249da78ebd72f318e35ccc425e9f403c392f22a/xxhash-3.6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b47bbd8cf2d72797f3c2772eaaac0ded3d3af26481a26d7d7d41dc2d3c46b04a", size = 32844, upload-time = "2025-10-02T14:34:14.037Z" }, - { url = "https://files.pythonhosted.org/packages/5e/ec/1cc11cd13e26ea8bc3cb4af4eaadd8d46d5014aebb67be3f71fb0b68802a/xxhash-3.6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2b6821e94346f96db75abaa6e255706fb06ebd530899ed76d32cd99f20dc52fa", size = 30809, upload-time = "2025-10-02T14:34:15.484Z" }, - { url = "https://files.pythonhosted.org/packages/04/5f/19fe357ea348d98ca22f456f75a30ac0916b51c753e1f8b2e0e6fb884cce/xxhash-3.6.0-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:d0a9751f71a1a65ce3584e9cae4467651c7e70c9d31017fa57574583a4540248", size = 194665, upload-time = "2025-10-02T14:34:16.541Z" }, - { url = "https://files.pythonhosted.org/packages/90/3b/d1f1a8f5442a5fd8beedae110c5af7604dc37349a8e16519c13c19a9a2de/xxhash-3.6.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b29ee68625ab37b04c0b40c3fafdf24d2f75ccd778333cfb698f65f6c463f62", size = 213550, upload-time = "2025-10-02T14:34:17.878Z" }, - { url = "https://files.pythonhosted.org/packages/c4/ef/3a9b05eb527457d5db13a135a2ae1a26c80fecd624d20f3e8dcc4cb170f3/xxhash-3.6.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6812c25fe0d6c36a46ccb002f40f27ac903bf18af9f6dd8f9669cb4d176ab18f", size = 212384, upload-time = "2025-10-02T14:34:19.182Z" }, - { url = "https://files.pythonhosted.org/packages/0f/18/ccc194ee698c6c623acbf0f8c2969811a8a4b6185af5e824cd27b9e4fd3e/xxhash-3.6.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4ccbff013972390b51a18ef1255ef5ac125c92dc9143b2d1909f59abc765540e", size = 445749, upload-time = "2025-10-02T14:34:20.659Z" }, - { url = "https://files.pythonhosted.org/packages/a5/86/cf2c0321dc3940a7aa73076f4fd677a0fb3e405cb297ead7d864fd90847e/xxhash-3.6.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:297b7fbf86c82c550e12e8fb71968b3f033d27b874276ba3624ea868c11165a8", size = 193880, upload-time = "2025-10-02T14:34:22.431Z" }, - { url = "https://files.pythonhosted.org/packages/82/fb/96213c8560e6f948a1ecc9a7613f8032b19ee45f747f4fca4eb31bb6d6ed/xxhash-3.6.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:dea26ae1eb293db089798d3973a5fc928a18fdd97cc8801226fae705b02b14b0", size = 210912, upload-time = "2025-10-02T14:34:23.937Z" }, - { url = "https://files.pythonhosted.org/packages/40/aa/4395e669b0606a096d6788f40dbdf2b819d6773aa290c19e6e83cbfc312f/xxhash-3.6.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:7a0b169aafb98f4284f73635a8e93f0735f9cbde17bd5ec332480484241aaa77", size = 198654, upload-time = "2025-10-02T14:34:25.644Z" }, - { url = "https://files.pythonhosted.org/packages/67/74/b044fcd6b3d89e9b1b665924d85d3f400636c23590226feb1eb09e1176ce/xxhash-3.6.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:08d45aef063a4531b785cd72de4887766d01dc8f362a515693df349fdb825e0c", size = 210867, upload-time = "2025-10-02T14:34:27.203Z" }, - { url = "https://files.pythonhosted.org/packages/bc/fd/3ce73bf753b08cb19daee1eb14aa0d7fe331f8da9c02dd95316ddfe5275e/xxhash-3.6.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:929142361a48ee07f09121fe9e96a84950e8d4df3bb298ca5d88061969f34d7b", size = 414012, upload-time = "2025-10-02T14:34:28.409Z" }, - { url = "https://files.pythonhosted.org/packages/ba/b3/5a4241309217c5c876f156b10778f3ab3af7ba7e3259e6d5f5c7d0129eb2/xxhash-3.6.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:51312c768403d8540487dbbfb557454cfc55589bbde6424456951f7fcd4facb3", size = 191409, upload-time = "2025-10-02T14:34:29.696Z" }, - { url = "https://files.pythonhosted.org/packages/c0/01/99bfbc15fb9abb9a72b088c1d95219fc4782b7d01fc835bd5744d66dd0b8/xxhash-3.6.0-cp311-cp311-win32.whl", hash = "sha256:d1927a69feddc24c987b337ce81ac15c4720955b667fe9b588e02254b80446fd", size = 30574, upload-time = "2025-10-02T14:34:31.028Z" }, - { url = "https://files.pythonhosted.org/packages/65/79/9d24d7f53819fe301b231044ea362ce64e86c74f6e8c8e51320de248b3e5/xxhash-3.6.0-cp311-cp311-win_amd64.whl", hash = "sha256:26734cdc2d4ffe449b41d186bbeac416f704a482ed835d375a5c0cb02bc63fef", size = 31481, upload-time = "2025-10-02T14:34:32.062Z" }, - { url = "https://files.pythonhosted.org/packages/30/4e/15cd0e3e8772071344eab2961ce83f6e485111fed8beb491a3f1ce100270/xxhash-3.6.0-cp311-cp311-win_arm64.whl", hash = "sha256:d72f67ef8bf36e05f5b6c65e8524f265bd61071471cd4cf1d36743ebeeeb06b7", size = 27861, upload-time = "2025-10-02T14:34:33.555Z" }, - { url = "https://files.pythonhosted.org/packages/9a/07/d9412f3d7d462347e4511181dea65e47e0d0e16e26fbee2ea86a2aefb657/xxhash-3.6.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:01362c4331775398e7bb34e3ab403bc9ee9f7c497bc7dee6272114055277dd3c", size = 32744, upload-time = "2025-10-02T14:34:34.622Z" }, - { url = "https://files.pythonhosted.org/packages/79/35/0429ee11d035fc33abe32dca1b2b69e8c18d236547b9a9b72c1929189b9a/xxhash-3.6.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b7b2df81a23f8cb99656378e72501b2cb41b1827c0f5a86f87d6b06b69f9f204", size = 30816, upload-time = "2025-10-02T14:34:36.043Z" }, - { url = "https://files.pythonhosted.org/packages/b7/f2/57eb99aa0f7d98624c0932c5b9a170e1806406cdbcdb510546634a1359e0/xxhash-3.6.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:dc94790144e66b14f67b10ac8ed75b39ca47536bf8800eb7c24b50271ea0c490", size = 194035, upload-time = "2025-10-02T14:34:37.354Z" }, - { url = "https://files.pythonhosted.org/packages/4c/ed/6224ba353690d73af7a3f1c7cdb1fc1b002e38f783cb991ae338e1eb3d79/xxhash-3.6.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:93f107c673bccf0d592cdba077dedaf52fe7f42dcd7676eba1f6d6f0c3efffd2", size = 212914, upload-time = "2025-10-02T14:34:38.6Z" }, - { url = "https://files.pythonhosted.org/packages/38/86/fb6b6130d8dd6b8942cc17ab4d90e223653a89aa32ad2776f8af7064ed13/xxhash-3.6.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2aa5ee3444c25b69813663c9f8067dcfaa2e126dc55e8dddf40f4d1c25d7effa", size = 212163, upload-time = "2025-10-02T14:34:39.872Z" }, - { url = "https://files.pythonhosted.org/packages/ee/dc/e84875682b0593e884ad73b2d40767b5790d417bde603cceb6878901d647/xxhash-3.6.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f7f99123f0e1194fa59cc69ad46dbae2e07becec5df50a0509a808f90a0f03f0", size = 445411, upload-time = "2025-10-02T14:34:41.569Z" }, - { url = "https://files.pythonhosted.org/packages/11/4f/426f91b96701ec2f37bb2b8cec664eff4f658a11f3fa9d94f0a887ea6d2b/xxhash-3.6.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:49e03e6fe2cac4a1bc64952dd250cf0dbc5ef4ebb7b8d96bce82e2de163c82a2", size = 193883, upload-time = "2025-10-02T14:34:43.249Z" }, - { url = "https://files.pythonhosted.org/packages/53/5a/ddbb83eee8e28b778eacfc5a85c969673e4023cdeedcfcef61f36731610b/xxhash-3.6.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:bd17fede52a17a4f9a7bc4472a5867cb0b160deeb431795c0e4abe158bc784e9", size = 210392, upload-time = "2025-10-02T14:34:45.042Z" }, - { url = "https://files.pythonhosted.org/packages/1e/c2/ff69efd07c8c074ccdf0a4f36fcdd3d27363665bcdf4ba399abebe643465/xxhash-3.6.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:6fb5f5476bef678f69db04f2bd1efbed3030d2aba305b0fc1773645f187d6a4e", size = 197898, upload-time = "2025-10-02T14:34:46.302Z" }, - { url = "https://files.pythonhosted.org/packages/58/ca/faa05ac19b3b622c7c9317ac3e23954187516298a091eb02c976d0d3dd45/xxhash-3.6.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:843b52f6d88071f87eba1631b684fcb4b2068cd2180a0224122fe4ef011a9374", size = 210655, upload-time = "2025-10-02T14:34:47.571Z" }, - { url = "https://files.pythonhosted.org/packages/d4/7a/06aa7482345480cc0cb597f5c875b11a82c3953f534394f620b0be2f700c/xxhash-3.6.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:7d14a6cfaf03b1b6f5f9790f76880601ccc7896aff7ab9cd8978a939c1eb7e0d", size = 414001, upload-time = "2025-10-02T14:34:49.273Z" }, - { url = "https://files.pythonhosted.org/packages/23/07/63ffb386cd47029aa2916b3d2f454e6cc5b9f5c5ada3790377d5430084e7/xxhash-3.6.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:418daf3db71e1413cfe211c2f9a528456936645c17f46b5204705581a45390ae", size = 191431, upload-time = "2025-10-02T14:34:50.798Z" }, - { url = "https://files.pythonhosted.org/packages/0f/93/14fde614cadb4ddf5e7cebf8918b7e8fac5ae7861c1875964f17e678205c/xxhash-3.6.0-cp312-cp312-win32.whl", hash = "sha256:50fc255f39428a27299c20e280d6193d8b63b8ef8028995323bf834a026b4fbb", size = 30617, upload-time = "2025-10-02T14:34:51.954Z" }, - { url = "https://files.pythonhosted.org/packages/13/5d/0d125536cbe7565a83d06e43783389ecae0c0f2ed037b48ede185de477c0/xxhash-3.6.0-cp312-cp312-win_amd64.whl", hash = "sha256:c0f2ab8c715630565ab8991b536ecded9416d615538be8ecddce43ccf26cbc7c", size = 31534, upload-time = "2025-10-02T14:34:53.276Z" }, - { url = "https://files.pythonhosted.org/packages/54/85/6ec269b0952ec7e36ba019125982cf11d91256a778c7c3f98a4c5043d283/xxhash-3.6.0-cp312-cp312-win_arm64.whl", hash = "sha256:eae5c13f3bc455a3bbb68bdc513912dc7356de7e2280363ea235f71f54064829", size = 27876, upload-time = "2025-10-02T14:34:54.371Z" }, - { url = "https://files.pythonhosted.org/packages/33/76/35d05267ac82f53ae9b0e554da7c5e281ee61f3cad44c743f0fcd354f211/xxhash-3.6.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:599e64ba7f67472481ceb6ee80fa3bd828fd61ba59fb11475572cc5ee52b89ec", size = 32738, upload-time = "2025-10-02T14:34:55.839Z" }, - { url = "https://files.pythonhosted.org/packages/31/a8/3fbce1cd96534a95e35d5120637bf29b0d7f5d8fa2f6374e31b4156dd419/xxhash-3.6.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7d8b8aaa30fca4f16f0c84a5c8d7ddee0e25250ec2796c973775373257dde8f1", size = 30821, upload-time = "2025-10-02T14:34:57.219Z" }, - { url = "https://files.pythonhosted.org/packages/0c/ea/d387530ca7ecfa183cb358027f1833297c6ac6098223fd14f9782cd0015c/xxhash-3.6.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:d597acf8506d6e7101a4a44a5e428977a51c0fadbbfd3c39650cca9253f6e5a6", size = 194127, upload-time = "2025-10-02T14:34:59.21Z" }, - { url = "https://files.pythonhosted.org/packages/ba/0c/71435dcb99874b09a43b8d7c54071e600a7481e42b3e3ce1eb5226a5711a/xxhash-3.6.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:858dc935963a33bc33490128edc1c12b0c14d9c7ebaa4e387a7869ecc4f3e263", size = 212975, upload-time = "2025-10-02T14:35:00.816Z" }, - { url = "https://files.pythonhosted.org/packages/84/7a/c2b3d071e4bb4a90b7057228a99b10d51744878f4a8a6dd643c8bd897620/xxhash-3.6.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ba284920194615cb8edf73bf52236ce2e1664ccd4a38fdb543506413529cc546", size = 212241, upload-time = "2025-10-02T14:35:02.207Z" }, - { url = "https://files.pythonhosted.org/packages/81/5f/640b6eac0128e215f177df99eadcd0f1b7c42c274ab6a394a05059694c5a/xxhash-3.6.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4b54219177f6c6674d5378bd862c6aedf64725f70dd29c472eaae154df1a2e89", size = 445471, upload-time = "2025-10-02T14:35:03.61Z" }, - { url = "https://files.pythonhosted.org/packages/5e/1e/3c3d3ef071b051cc3abbe3721ffb8365033a172613c04af2da89d5548a87/xxhash-3.6.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:42c36dd7dbad2f5238950c377fcbf6811b1cdb1c444fab447960030cea60504d", size = 193936, upload-time = "2025-10-02T14:35:05.013Z" }, - { url = "https://files.pythonhosted.org/packages/2c/bd/4a5f68381939219abfe1c22a9e3a5854a4f6f6f3c4983a87d255f21f2e5d/xxhash-3.6.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f22927652cba98c44639ffdc7aaf35828dccf679b10b31c4ad72a5b530a18eb7", size = 210440, upload-time = "2025-10-02T14:35:06.239Z" }, - { url = "https://files.pythonhosted.org/packages/eb/37/b80fe3d5cfb9faff01a02121a0f4d565eb7237e9e5fc66e73017e74dcd36/xxhash-3.6.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b45fad44d9c5c119e9c6fbf2e1c656a46dc68e280275007bbfd3d572b21426db", size = 197990, upload-time = "2025-10-02T14:35:07.735Z" }, - { url = "https://files.pythonhosted.org/packages/d7/fd/2c0a00c97b9e18f72e1f240ad4e8f8a90fd9d408289ba9c7c495ed7dc05c/xxhash-3.6.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:6f2580ffab1a8b68ef2b901cde7e55fa8da5e4be0977c68f78fc80f3c143de42", size = 210689, upload-time = "2025-10-02T14:35:09.438Z" }, - { url = "https://files.pythonhosted.org/packages/93/86/5dd8076a926b9a95db3206aba20d89a7fc14dd5aac16e5c4de4b56033140/xxhash-3.6.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:40c391dd3cd041ebc3ffe6f2c862f402e306eb571422e0aa918d8070ba31da11", size = 414068, upload-time = "2025-10-02T14:35:11.162Z" }, - { url = "https://files.pythonhosted.org/packages/af/3c/0bb129170ee8f3650f08e993baee550a09593462a5cddd8e44d0011102b1/xxhash-3.6.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f205badabde7aafd1a31e8ca2a3e5a763107a71c397c4481d6a804eb5063d8bd", size = 191495, upload-time = "2025-10-02T14:35:12.971Z" }, - { url = "https://files.pythonhosted.org/packages/e9/3a/6797e0114c21d1725e2577508e24006fd7ff1d8c0c502d3b52e45c1771d8/xxhash-3.6.0-cp313-cp313-win32.whl", hash = "sha256:2577b276e060b73b73a53042ea5bd5203d3e6347ce0d09f98500f418a9fcf799", size = 30620, upload-time = "2025-10-02T14:35:14.129Z" }, - { url = "https://files.pythonhosted.org/packages/86/15/9bc32671e9a38b413a76d24722a2bf8784a132c043063a8f5152d390b0f9/xxhash-3.6.0-cp313-cp313-win_amd64.whl", hash = "sha256:757320d45d2fbcce8f30c42a6b2f47862967aea7bf458b9625b4bbe7ee390392", size = 31542, upload-time = "2025-10-02T14:35:15.21Z" }, - { url = "https://files.pythonhosted.org/packages/39/c5/cc01e4f6188656e56112d6a8e0dfe298a16934b8c47a247236549a3f7695/xxhash-3.6.0-cp313-cp313-win_arm64.whl", hash = "sha256:457b8f85dec5825eed7b69c11ae86834a018b8e3df5e77783c999663da2f96d6", size = 27880, upload-time = "2025-10-02T14:35:16.315Z" }, - { url = "https://files.pythonhosted.org/packages/f3/30/25e5321c8732759e930c555176d37e24ab84365482d257c3b16362235212/xxhash-3.6.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a42e633d75cdad6d625434e3468126c73f13f7584545a9cf34e883aa1710e702", size = 32956, upload-time = "2025-10-02T14:35:17.413Z" }, - { url = "https://files.pythonhosted.org/packages/9f/3c/0573299560d7d9f8ab1838f1efc021a280b5ae5ae2e849034ef3dee18810/xxhash-3.6.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:568a6d743219e717b07b4e03b0a828ce593833e498c3b64752e0f5df6bfe84db", size = 31072, upload-time = "2025-10-02T14:35:18.844Z" }, - { url = "https://files.pythonhosted.org/packages/7a/1c/52d83a06e417cd9d4137722693424885cc9878249beb3a7c829e74bf7ce9/xxhash-3.6.0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:bec91b562d8012dae276af8025a55811b875baace6af510412a5e58e3121bc54", size = 196409, upload-time = "2025-10-02T14:35:20.31Z" }, - { url = "https://files.pythonhosted.org/packages/e3/8e/c6d158d12a79bbd0b878f8355432075fc82759e356ab5a111463422a239b/xxhash-3.6.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:78e7f2f4c521c30ad5e786fdd6bae89d47a32672a80195467b5de0480aa97b1f", size = 215736, upload-time = "2025-10-02T14:35:21.616Z" }, - { url = "https://files.pythonhosted.org/packages/bc/68/c4c80614716345d55071a396cf03d06e34b5f4917a467faf43083c995155/xxhash-3.6.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3ed0df1b11a79856df5ffcab572cbd6b9627034c1c748c5566fa79df9048a7c5", size = 214833, upload-time = "2025-10-02T14:35:23.32Z" }, - { url = "https://files.pythonhosted.org/packages/7e/e9/ae27c8ffec8b953efa84c7c4a6c6802c263d587b9fc0d6e7cea64e08c3af/xxhash-3.6.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0e4edbfc7d420925b0dd5e792478ed393d6e75ff8fc219a6546fb446b6a417b1", size = 448348, upload-time = "2025-10-02T14:35:25.111Z" }, - { url = "https://files.pythonhosted.org/packages/d7/6b/33e21afb1b5b3f46b74b6bd1913639066af218d704cc0941404ca717fc57/xxhash-3.6.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fba27a198363a7ef87f8c0f6b171ec36b674fe9053742c58dd7e3201c1ab30ee", size = 196070, upload-time = "2025-10-02T14:35:26.586Z" }, - { url = "https://files.pythonhosted.org/packages/96/b6/fcabd337bc5fa624e7203aa0fa7d0c49eed22f72e93229431752bddc83d9/xxhash-3.6.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:794fe9145fe60191c6532fa95063765529770edcdd67b3d537793e8004cabbfd", size = 212907, upload-time = "2025-10-02T14:35:28.087Z" }, - { url = "https://files.pythonhosted.org/packages/4b/d3/9ee6160e644d660fcf176c5825e61411c7f62648728f69c79ba237250143/xxhash-3.6.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:6105ef7e62b5ac73a837778efc331a591d8442f8ef5c7e102376506cb4ae2729", size = 200839, upload-time = "2025-10-02T14:35:29.857Z" }, - { url = "https://files.pythonhosted.org/packages/0d/98/e8de5baa5109394baf5118f5e72ab21a86387c4f89b0e77ef3e2f6b0327b/xxhash-3.6.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:f01375c0e55395b814a679b3eea205db7919ac2af213f4a6682e01220e5fe292", size = 213304, upload-time = "2025-10-02T14:35:31.222Z" }, - { url = "https://files.pythonhosted.org/packages/7b/1d/71056535dec5c3177eeb53e38e3d367dd1d16e024e63b1cee208d572a033/xxhash-3.6.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:d706dca2d24d834a4661619dcacf51a75c16d65985718d6a7d73c1eeeb903ddf", size = 416930, upload-time = "2025-10-02T14:35:32.517Z" }, - { url = "https://files.pythonhosted.org/packages/dc/6c/5cbde9de2cd967c322e651c65c543700b19e7ae3e0aae8ece3469bf9683d/xxhash-3.6.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5f059d9faeacd49c0215d66f4056e1326c80503f51a1532ca336a385edadd033", size = 193787, upload-time = "2025-10-02T14:35:33.827Z" }, - { url = "https://files.pythonhosted.org/packages/19/fa/0172e350361d61febcea941b0cc541d6e6c8d65d153e85f850a7b256ff8a/xxhash-3.6.0-cp313-cp313t-win32.whl", hash = "sha256:1244460adc3a9be84731d72b8e80625788e5815b68da3da8b83f78115a40a7ec", size = 30916, upload-time = "2025-10-02T14:35:35.107Z" }, - { url = "https://files.pythonhosted.org/packages/ad/e6/e8cf858a2b19d6d45820f072eff1bea413910592ff17157cabc5f1227a16/xxhash-3.6.0-cp313-cp313t-win_amd64.whl", hash = "sha256:b1e420ef35c503869c4064f4a2f2b08ad6431ab7b229a05cce39d74268bca6b8", size = 31799, upload-time = "2025-10-02T14:35:36.165Z" }, - { url = "https://files.pythonhosted.org/packages/56/15/064b197e855bfb7b343210e82490ae672f8bc7cdf3ddb02e92f64304ee8a/xxhash-3.6.0-cp313-cp313t-win_arm64.whl", hash = "sha256:ec44b73a4220623235f67a996c862049f375df3b1052d9899f40a6382c32d746", size = 28044, upload-time = "2025-10-02T14:35:37.195Z" }, - { url = "https://files.pythonhosted.org/packages/7e/5e/0138bc4484ea9b897864d59fce9be9086030825bc778b76cb5a33a906d37/xxhash-3.6.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:a40a3d35b204b7cc7643cbcf8c9976d818cb47befcfac8bbefec8038ac363f3e", size = 32754, upload-time = "2025-10-02T14:35:38.245Z" }, - { url = "https://files.pythonhosted.org/packages/18/d7/5dac2eb2ec75fd771957a13e5dda560efb2176d5203f39502a5fc571f899/xxhash-3.6.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:a54844be970d3fc22630b32d515e79a90d0a3ddb2644d8d7402e3c4c8da61405", size = 30846, upload-time = "2025-10-02T14:35:39.6Z" }, - { url = "https://files.pythonhosted.org/packages/fe/71/8bc5be2bb00deb5682e92e8da955ebe5fa982da13a69da5a40a4c8db12fb/xxhash-3.6.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:016e9190af8f0a4e3741343777710e3d5717427f175adfdc3e72508f59e2a7f3", size = 194343, upload-time = "2025-10-02T14:35:40.69Z" }, - { url = "https://files.pythonhosted.org/packages/e7/3b/52badfb2aecec2c377ddf1ae75f55db3ba2d321c5e164f14461c90837ef3/xxhash-3.6.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4f6f72232f849eb9d0141e2ebe2677ece15adfd0fa599bc058aad83c714bb2c6", size = 213074, upload-time = "2025-10-02T14:35:42.29Z" }, - { url = "https://files.pythonhosted.org/packages/a2/2b/ae46b4e9b92e537fa30d03dbc19cdae57ed407e9c26d163895e968e3de85/xxhash-3.6.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:63275a8aba7865e44b1813d2177e0f5ea7eadad3dd063a21f7cf9afdc7054063", size = 212388, upload-time = "2025-10-02T14:35:43.929Z" }, - { url = "https://files.pythonhosted.org/packages/f5/80/49f88d3afc724b4ac7fbd664c8452d6db51b49915be48c6982659e0e7942/xxhash-3.6.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3cd01fa2aa00d8b017c97eb46b9a794fbdca53fc14f845f5a328c71254b0abb7", size = 445614, upload-time = "2025-10-02T14:35:45.216Z" }, - { url = "https://files.pythonhosted.org/packages/ed/ba/603ce3961e339413543d8cd44f21f2c80e2a7c5cfe692a7b1f2cccf58f3c/xxhash-3.6.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0226aa89035b62b6a86d3c68df4d7c1f47a342b8683da2b60cedcddb46c4d95b", size = 194024, upload-time = "2025-10-02T14:35:46.959Z" }, - { url = "https://files.pythonhosted.org/packages/78/d1/8e225ff7113bf81545cfdcd79eef124a7b7064a0bba53605ff39590b95c2/xxhash-3.6.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c6e193e9f56e4ca4923c61238cdaced324f0feac782544eb4c6d55ad5cc99ddd", size = 210541, upload-time = "2025-10-02T14:35:48.301Z" }, - { url = "https://files.pythonhosted.org/packages/6f/58/0f89d149f0bad89def1a8dd38feb50ccdeb643d9797ec84707091d4cb494/xxhash-3.6.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:9176dcaddf4ca963d4deb93866d739a343c01c969231dbe21680e13a5d1a5bf0", size = 198305, upload-time = "2025-10-02T14:35:49.584Z" }, - { url = "https://files.pythonhosted.org/packages/11/38/5eab81580703c4df93feb5f32ff8fa7fe1e2c51c1f183ee4e48d4bb9d3d7/xxhash-3.6.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:c1ce4009c97a752e682b897aa99aef84191077a9433eb237774689f14f8ec152", size = 210848, upload-time = "2025-10-02T14:35:50.877Z" }, - { url = "https://files.pythonhosted.org/packages/5e/6b/953dc4b05c3ce678abca756416e4c130d2382f877a9c30a20d08ee6a77c0/xxhash-3.6.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:8cb2f4f679b01513b7adbb9b1b2f0f9cdc31b70007eaf9d59d0878809f385b11", size = 414142, upload-time = "2025-10-02T14:35:52.15Z" }, - { url = "https://files.pythonhosted.org/packages/08/a9/238ec0d4e81a10eb5026d4a6972677cbc898ba6c8b9dbaec12ae001b1b35/xxhash-3.6.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:653a91d7c2ab54a92c19ccf43508b6a555440b9be1bc8be553376778be7f20b5", size = 191547, upload-time = "2025-10-02T14:35:53.547Z" }, - { url = "https://files.pythonhosted.org/packages/f1/ee/3cf8589e06c2164ac77c3bf0aa127012801128f1feebf2a079272da5737c/xxhash-3.6.0-cp314-cp314-win32.whl", hash = "sha256:a756fe893389483ee8c394d06b5ab765d96e68fbbfe6fde7aa17e11f5720559f", size = 31214, upload-time = "2025-10-02T14:35:54.746Z" }, - { url = "https://files.pythonhosted.org/packages/02/5d/a19552fbc6ad4cb54ff953c3908bbc095f4a921bc569433d791f755186f1/xxhash-3.6.0-cp314-cp314-win_amd64.whl", hash = "sha256:39be8e4e142550ef69629c9cd71b88c90e9a5db703fecbcf265546d9536ca4ad", size = 32290, upload-time = "2025-10-02T14:35:55.791Z" }, - { url = "https://files.pythonhosted.org/packages/b1/11/dafa0643bc30442c887b55baf8e73353a344ee89c1901b5a5c54a6c17d39/xxhash-3.6.0-cp314-cp314-win_arm64.whl", hash = "sha256:25915e6000338999236f1eb68a02a32c3275ac338628a7eaa5a269c401995679", size = 28795, upload-time = "2025-10-02T14:35:57.162Z" }, - { url = "https://files.pythonhosted.org/packages/2c/db/0e99732ed7f64182aef4a6fb145e1a295558deec2a746265dcdec12d191e/xxhash-3.6.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:c5294f596a9017ca5a3e3f8884c00b91ab2ad2933cf288f4923c3fd4346cf3d4", size = 32955, upload-time = "2025-10-02T14:35:58.267Z" }, - { url = "https://files.pythonhosted.org/packages/55/f4/2a7c3c68e564a099becfa44bb3d398810cc0ff6749b0d3cb8ccb93f23c14/xxhash-3.6.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1cf9dcc4ab9cff01dfbba78544297a3a01dafd60f3bde4e2bfd016cf7e4ddc67", size = 31072, upload-time = "2025-10-02T14:35:59.382Z" }, - { url = "https://files.pythonhosted.org/packages/c6/d9/72a29cddc7250e8a5819dad5d466facb5dc4c802ce120645630149127e73/xxhash-3.6.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:01262da8798422d0685f7cef03b2bd3f4f46511b02830861df548d7def4402ad", size = 196579, upload-time = "2025-10-02T14:36:00.838Z" }, - { url = "https://files.pythonhosted.org/packages/63/93/b21590e1e381040e2ca305a884d89e1c345b347404f7780f07f2cdd47ef4/xxhash-3.6.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:51a73fb7cb3a3ead9f7a8b583ffd9b8038e277cdb8cb87cf890e88b3456afa0b", size = 215854, upload-time = "2025-10-02T14:36:02.207Z" }, - { url = "https://files.pythonhosted.org/packages/ce/b8/edab8a7d4fa14e924b29be877d54155dcbd8b80be85ea00d2be3413a9ed4/xxhash-3.6.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b9c6df83594f7df8f7f708ce5ebeacfc69f72c9fbaaababf6cf4758eaada0c9b", size = 214965, upload-time = "2025-10-02T14:36:03.507Z" }, - { url = "https://files.pythonhosted.org/packages/27/67/dfa980ac7f0d509d54ea0d5a486d2bb4b80c3f1bb22b66e6a05d3efaf6c0/xxhash-3.6.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:627f0af069b0ea56f312fd5189001c24578868643203bca1abbc2c52d3a6f3ca", size = 448484, upload-time = "2025-10-02T14:36:04.828Z" }, - { url = "https://files.pythonhosted.org/packages/8c/63/8ffc2cc97e811c0ca5d00ab36604b3ea6f4254f20b7bc658ca825ce6c954/xxhash-3.6.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:aa912c62f842dfd013c5f21a642c9c10cd9f4c4e943e0af83618b4a404d9091a", size = 196162, upload-time = "2025-10-02T14:36:06.182Z" }, - { url = "https://files.pythonhosted.org/packages/4b/77/07f0e7a3edd11a6097e990f6e5b815b6592459cb16dae990d967693e6ea9/xxhash-3.6.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:b465afd7909db30168ab62afe40b2fcf79eedc0b89a6c0ab3123515dc0df8b99", size = 213007, upload-time = "2025-10-02T14:36:07.733Z" }, - { url = "https://files.pythonhosted.org/packages/ae/d8/bc5fa0d152837117eb0bef6f83f956c509332ce133c91c63ce07ee7c4873/xxhash-3.6.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:a881851cf38b0a70e7c4d3ce81fc7afd86fbc2a024f4cfb2a97cf49ce04b75d3", size = 200956, upload-time = "2025-10-02T14:36:09.106Z" }, - { url = "https://files.pythonhosted.org/packages/26/a5/d749334130de9411783873e9b98ecc46688dad5db64ca6e04b02acc8b473/xxhash-3.6.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:9b3222c686a919a0f3253cfc12bb118b8b103506612253b5baeaac10d8027cf6", size = 213401, upload-time = "2025-10-02T14:36:10.585Z" }, - { url = "https://files.pythonhosted.org/packages/89/72/abed959c956a4bfc72b58c0384bb7940663c678127538634d896b1195c10/xxhash-3.6.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:c5aa639bc113e9286137cec8fadc20e9cd732b2cc385c0b7fa673b84fc1f2a93", size = 417083, upload-time = "2025-10-02T14:36:12.276Z" }, - { url = "https://files.pythonhosted.org/packages/0c/b3/62fd2b586283b7d7d665fb98e266decadf31f058f1cf6c478741f68af0cb/xxhash-3.6.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5c1343d49ac102799905e115aee590183c3921d475356cb24b4de29a4bc56518", size = 193913, upload-time = "2025-10-02T14:36:14.025Z" }, - { url = "https://files.pythonhosted.org/packages/9a/9a/c19c42c5b3f5a4aad748a6d5b4f23df3bed7ee5445accc65a0fb3ff03953/xxhash-3.6.0-cp314-cp314t-win32.whl", hash = "sha256:5851f033c3030dd95c086b4a36a2683c2ff4a799b23af60977188b057e467119", size = 31586, upload-time = "2025-10-02T14:36:15.603Z" }, - { url = "https://files.pythonhosted.org/packages/03/d6/4cc450345be9924fd5dc8c590ceda1db5b43a0a889587b0ae81a95511360/xxhash-3.6.0-cp314-cp314t-win_amd64.whl", hash = "sha256:0444e7967dac37569052d2409b00a8860c2135cff05502df4da80267d384849f", size = 32526, upload-time = "2025-10-02T14:36:16.708Z" }, - { url = "https://files.pythonhosted.org/packages/0f/c9/7243eb3f9eaabd1a88a5a5acadf06df2d83b100c62684b7425c6a11bcaa8/xxhash-3.6.0-cp314-cp314t-win_arm64.whl", hash = "sha256:bb79b1e63f6fd84ec778a4b1916dfe0a7c3fdb986c06addd5db3a0d413819d95", size = 28898, upload-time = "2025-10-02T14:36:17.843Z" }, - { url = "https://files.pythonhosted.org/packages/93/1e/8aec23647a34a249f62e2398c42955acd9b4c6ed5cf08cbea94dc46f78d2/xxhash-3.6.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0f7b7e2ec26c1666ad5fc9dbfa426a6a3367ceaf79db5dd76264659d509d73b0", size = 30662, upload-time = "2025-10-02T14:37:01.743Z" }, - { url = "https://files.pythonhosted.org/packages/b8/0b/b14510b38ba91caf43006209db846a696ceea6a847a0c9ba0a5b1adc53d6/xxhash-3.6.0-pp311-pypy311_pp73-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:5dc1e14d14fa0f5789ec29a7062004b5933964bb9b02aae6622b8f530dc40296", size = 41056, upload-time = "2025-10-02T14:37:02.879Z" }, - { url = "https://files.pythonhosted.org/packages/50/55/15a7b8a56590e66ccd374bbfa3f9ffc45b810886c8c3b614e3f90bd2367c/xxhash-3.6.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:881b47fc47e051b37d94d13e7455131054b56749b91b508b0907eb07900d1c13", size = 36251, upload-time = "2025-10-02T14:37:04.44Z" }, - { url = "https://files.pythonhosted.org/packages/62/b2/5ac99a041a29e58e95f907876b04f7067a0242cb85b5f39e726153981503/xxhash-3.6.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c6dc31591899f5e5666f04cc2e529e69b4072827085c1ef15294d91a004bc1bd", size = 32481, upload-time = "2025-10-02T14:37:05.869Z" }, - { url = "https://files.pythonhosted.org/packages/7b/d9/8d95e906764a386a3d3b596f3c68bb63687dfca806373509f51ce8eea81f/xxhash-3.6.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:15e0dac10eb9309508bfc41f7f9deaa7755c69e35af835db9cb10751adebc35d", size = 31565, upload-time = "2025-10-02T14:37:06.966Z" }, -] - [[package]] name = "yarl" version = "1.22.0" @@ -6117,21 +4667,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/73/ae/b48f95715333080afb75a4504487cbe142cae1268afc482d06692d605ae6/yarl-1.22.0-py3-none-any.whl", hash = "sha256:1380560bdba02b6b6c90de54133c81c9f2a453dee9912fe58c1dcced1edb7cff", size = 46814, upload-time = "2025-10-06T14:12:53.872Z" }, ] -[[package]] -name = "yc-bench" -version = "0.1.0" -source = { git = "https://github.com/collinear-ai/yc-bench.git?rev=bfb0c88062450f46341bd9a5298903fc2e952a5c#bfb0c88062450f46341bd9a5298903fc2e952a5c" } -dependencies = [ - { name = "litellm", marker = "python_full_version >= '3.12'" }, - { name = "matplotlib", marker = "python_full_version >= '3.12'" }, - { name = "plotly", marker = "python_full_version >= '3.12'" }, - { name = "pydantic", marker = "python_full_version >= '3.12'" }, - { name = "python-dotenv", marker = "python_full_version >= '3.12'" }, - { name = "sqlalchemy", marker = "python_full_version >= '3.12'" }, - { name = "streamlit", marker = "python_full_version >= '3.12'" }, - { name = "typer", marker = "python_full_version >= '3.12'" }, -] - [[package]] name = "youtube-transcript-api" version = "1.2.4" diff --git a/web/package.json b/web/package.json index e1df1e132056..50456076b643 100644 --- a/web/package.json +++ b/web/package.json @@ -4,7 +4,7 @@ "version": "0.0.0", "type": "module", "scripts": { - "sync-assets": "rm -rf public/fonts public/ds-assets && cp -r node_modules/@nous-research/ui/dist/fonts public/fonts && cp -r node_modules/@nous-research/ui/dist/assets public/ds-assets", + "sync-assets": "node scripts/sync-assets.mjs", "predev": "npm run sync-assets", "prebuild": "npm run sync-assets", "dev": "vite", diff --git a/web/scripts/sync-assets.mjs b/web/scripts/sync-assets.mjs new file mode 100644 index 000000000000..19b0bafb6aab --- /dev/null +++ b/web/scripts/sync-assets.mjs @@ -0,0 +1,27 @@ +#!/usr/bin/env node +// Cross-platform replacement for the previous shell pipeline: +// +// rm -rf public/fonts public/ds-assets +// && cp -r node_modules/@nous-research/ui/dist/fonts public/fonts +// && cp -r node_modules/@nous-research/ui/dist/assets public/ds-assets +// +// `rm -rf` / `cp -r` don't exist on Windows cmd.exe, so `npm run build` +// (invoked from Python via subprocess โ†’ cmd.exe) failed before Vite ran. +// Using Node's stdlib fs keeps this dependency-free and platform-neutral. + +import { cpSync, rmSync } from "node:fs"; +import { dirname, resolve } from "node:path"; +import { fileURLToPath } from "node:url"; + +const webRoot = resolve(dirname(fileURLToPath(import.meta.url)), ".."); +const uiDist = resolve(webRoot, "node_modules", "@nous-research", "ui", "dist"); + +const targets = [ + { from: resolve(uiDist, "fonts"), to: resolve(webRoot, "public", "fonts") }, + { from: resolve(uiDist, "assets"), to: resolve(webRoot, "public", "ds-assets") }, +]; + +for (const { from, to } of targets) { + rmSync(to, { recursive: true, force: true }); + cpSync(from, to, { recursive: true }); +} diff --git a/website/docs/developer-guide/acp-internals.md b/website/docs/developer-guide/acp-internals.md index 2ef552e266c3..89ae398b6af5 100644 --- a/website/docs/developer-guide/acp-internals.md +++ b/website/docs/developer-guide/acp-internals.md @@ -24,12 +24,15 @@ Key implementation files: ```text hermes acp / hermes-acp / python -m acp_adapter -> acp_adapter.entry.main() + -> parse --version / --check / --setup before server startup -> load ~/.hermes/.env -> configure stderr logging -> construct HermesACPAgent -> acp.run_agent(agent, use_unstable_protocol=True) ``` +The Zed ACP Registry path launches the same adapter through `uvx --from 'hermes-agent[acp]==' hermes-acp`, pointed at the `hermes-agent` PyPI release. + Stdout is reserved for ACP JSON-RPC transport. Human-readable logs go to stderr. ## Major components @@ -146,7 +149,7 @@ Instead it reuses Hermes' runtime resolver: - `acp_adapter/auth.py` - `hermes_cli/runtime_provider.py` -So ACP advertises and uses the currently configured Hermes provider/credentials. +So ACP advertises and uses the currently configured Hermes provider/credentials. It also always advertises a terminal setup auth method (`hermes-setup`, args `--setup`) so first-run registry clients can open Hermes' interactive model/provider configuration before starting a normal ACP session. ## Working directory binding diff --git a/website/docs/developer-guide/architecture.md b/website/docs/developer-guide/architecture.md index af2b0a2fd4b5..b5e2add8993e 100644 --- a/website/docs/developer-guide/architecture.md +++ b/website/docs/developer-guide/architecture.md @@ -127,7 +127,6 @@ hermes-agent/ โ”œโ”€โ”€ cron/ # Scheduler (jobs.py, scheduler.py) โ”œโ”€โ”€ plugins/memory/ # Memory provider plugins โ”œโ”€โ”€ plugins/context_engine/ # Context engine plugins -โ”œโ”€โ”€ environments/ # RL training environments (Atropos) โ”œโ”€โ”€ skills/ # Bundled skills (always available) โ”œโ”€โ”€ optional-skills/ # Official optional skills (install explicitly) โ”œโ”€โ”€ website/ # Docusaurus documentation site @@ -185,7 +184,6 @@ If you are new to the codebase: 8. **[Gateway Internals](./gateway-internals.md)** โ€” messaging platform gateway 9. **[Context Compression & Prompt Caching](./context-compression-and-caching.md)** โ€” compression and caching 10. **[ACP Internals](./acp-internals.md)** โ€” IDE integration -11. **[Environments, Benchmarks & Data Generation](./environments.md)** โ€” RL training ## Major Subsystems @@ -247,11 +245,11 @@ Exposes Hermes as an editor-native agent over stdio/JSON-RPC for VS Code, Zed, a โ†’ [ACP Internals](./acp-internals.md) -### RL / Environments / Trajectories +### Trajectories -Full environment framework for evaluation and RL training. Integrates with Atropos, supports multiple tool-call parsers, and generates ShareGPT-format trajectories. +Generates ShareGPT-format trajectories from agent sessions for training data generation. -โ†’ [Environments, Benchmarks & Data Generation](./environments.md), [Trajectories & Training Format](./trajectory-format.md) +โ†’ [Trajectories & Training Format](./trajectory-format.md) ## Design Principles diff --git a/website/docs/developer-guide/contributing.md b/website/docs/developer-guide/contributing.md index 6e00e367330b..b3bf9799d714 100644 --- a/website/docs/developer-guide/contributing.md +++ b/website/docs/developer-guide/contributing.md @@ -50,9 +50,6 @@ export VIRTUAL_ENV="$(pwd)/venv" # Install with all extras (messaging, cron, CLI menus, dev tools) uv pip install -e ".[all,dev]" -# tinker-atropos is a git submodule โ€” needs `git submodule update --init` first -# if you didn't clone with `--recurse-submodules` -uv pip install -e "./tinker-atropos" # Optional: browser tools npm install diff --git a/website/docs/developer-guide/creating-skills.md b/website/docs/developer-guide/creating-skills.md index 43f088a9a351..7496c661d48d 100644 --- a/website/docs/developer-guide/creating-skills.md +++ b/website/docs/developer-guide/creating-skills.md @@ -360,7 +360,7 @@ All hub-installed skills go through a security scanner that checks for: Trust levels: - `builtin` โ€” ships with Hermes (always trusted) - `official` โ€” from `optional-skills/` in the repo (builtin trust, no third-party warning) -- `trusted` โ€” from openai/skills, anthropics/skills +- `trusted` โ€” from openai/skills, anthropics/skills, huggingface/skills - `community` โ€” non-dangerous findings can be overridden with `--force`; `dangerous` verdicts remain blocked Hermes can now consume third-party skills from multiple external discovery models: diff --git a/website/docs/developer-guide/environments.md b/website/docs/developer-guide/environments.md deleted file mode 100644 index 0a5aa00ffff4..000000000000 --- a/website/docs/developer-guide/environments.md +++ /dev/null @@ -1,520 +0,0 @@ ---- -sidebar_position: 5 -title: "Environments, Benchmarks & Data Generation" -description: "Building RL training environments, running evaluation benchmarks, and generating SFT data with the Hermes-Agent Atropos integration" ---- - -# Environments, Benchmarks & Data Generation - -Hermes Agent includes a full environment framework that connects its tool-calling capabilities to the [Atropos](https://github.com/NousResearch/atropos) RL training framework. This enables three workflows: - -1. **RL Training** โ€” Train language models on multi-turn agentic tasks with GRPO -2. **Benchmarks** โ€” Evaluate models on standardised agentic benchmarks -3. **Data Generation** โ€” Generate SFT training data from agent rollouts - -All three share the same core: an **environment** class that defines tasks, runs an agent loop, and scores the output. - -:::info Repo environments vs RL training tools -The Python environment framework documented here lives under the repo's `environments/` directory and is the implementation-level API for Hermes/Atropos integration. This is separate from the user-facing `rl_*` tools, which operate as an orchestration surface for remote RL training workflows. -::: - -:::tip Quick Links -- **Want to run benchmarks?** Jump to [Available Benchmarks](#available-benchmarks) -- **Want to train with RL?** See [RL Training Tools](/user-guide/features/rl-training) for the agent-driven interface, or [Running Environments](#running-environments) for manual execution -- **Want to create a new environment?** See [Creating Environments](#creating-environments) -::: - -## Architecture - -The environment system is built on a three-layer inheritance chain: - -```mermaid -classDiagram - class BaseEnv { - Server management - Worker scheduling - Wandb logging - CLI: serve / process / evaluate - } - - class HermesAgentBaseEnv { - Terminal backend configuration - Tool resolution - Agent loop engine - ToolContext access - } - - class TerminalTestEnv { - Stack testing - } - - class HermesSweEnv { - SWE training - } - - class TerminalBench2EvalEnv { - Benchmark evaluation - } - - class TBLiteEvalEnv { - Fast benchmark - } - - class YCBenchEvalEnv { - Long-horizon benchmark - } - - BaseEnv <|-- HermesAgentBaseEnv - HermesAgentBaseEnv <|-- TerminalTestEnv - HermesAgentBaseEnv <|-- HermesSweEnv - HermesAgentBaseEnv <|-- TerminalBench2EvalEnv - TerminalBench2EvalEnv <|-- TBLiteEvalEnv - TerminalBench2EvalEnv <|-- YCBenchEvalEnv -``` - -### BaseEnv (Atropos) - -The foundation from `atroposlib`. Provides: -- **Server management** โ€” connects to OpenAI-compatible APIs (VLLM, SGLang, OpenRouter) -- **Worker scheduling** โ€” parallel rollout coordination -- **Wandb integration** โ€” metrics logging and rollout visualisation -- **CLI interface** โ€” three subcommands: `serve`, `process`, `evaluate` -- **Eval logging** โ€” `evaluate_log()` saves results to JSON + JSONL - -### HermesAgentBaseEnv - -The hermes-agent layer (`environments/hermes_base_env.py`). Adds: -- **Terminal backend configuration** โ€” sets `TERMINAL_ENV` for sandboxed execution (local, Docker, Modal, Daytona, SSH, Singularity) -- **Tool resolution** โ€” `_resolve_tools_for_group()` calls hermes-agent's `get_tool_definitions()` to get the right tool schemas based on enabled/disabled toolsets -- **Agent loop integration** โ€” `collect_trajectory()` runs `HermesAgentLoop` and scores the result -- **Two-phase operation** โ€” Phase 1 (OpenAI server) for eval/SFT, Phase 2 (VLLM ManagedServer) for full RL with logprobs -- **Async safety patches** โ€” monkey-patches Modal backend to work inside Atropos's event loop - -### Concrete Environments - -Your environment inherits from `HermesAgentBaseEnv` and implements five methods: - -| Method | Purpose | -|--------|---------| -| `setup()` | Load dataset, initialise state | -| `get_next_item()` | Return the next item for rollout | -| `format_prompt(item)` | Convert an item into the user message | -| `compute_reward(item, result, ctx)` | Score the rollout (0.0โ€“1.0) | -| `evaluate()` | Periodic evaluation logic | - -## Core Components - -### Agent Loop - -`HermesAgentLoop` (`environments/agent_loop.py`) is the reusable multi-turn agent engine. It runs the same tool-calling pattern as hermes-agent's main loop: - -1. Send messages + tool schemas to the API via `server.chat_completion()` -2. If the response contains `tool_calls`, dispatch each via `handle_function_call()` -3. Append tool results to the conversation, go back to step 1 -4. If no `tool_calls`, the agent is done - -Tool calls execute in a thread pool (`ThreadPoolExecutor(128)`) so that async backends (Modal, Docker) don't deadlock inside Atropos's event loop. - -Returns an `AgentResult`: - -```python -@dataclass -class AgentResult: - messages: List[Dict[str, Any]] # Full conversation history - turns_used: int # Number of LLM calls made - finished_naturally: bool # True if model stopped on its own - reasoning_per_turn: List[Optional[str]] # Extracted reasoning content - tool_errors: List[ToolError] # Errors encountered during tool dispatch - managed_state: Optional[Dict] # VLLM ManagedServer state (Phase 2) -``` - -### Tool Context - -`ToolContext` (`environments/tool_context.py`) gives reward functions direct access to the **same sandbox** the model used during its rollout. The `task_id` scoping means all state (files, processes, browser tabs) is preserved. - -```python -async def compute_reward(self, item, result, ctx: ToolContext): - # Run tests in the model's terminal sandbox - test = ctx.terminal("pytest -v") - if test["exit_code"] == 0: - return 1.0 - - # Check if a file was created - content = ctx.read_file("/workspace/solution.py") - if content.get("content"): - return 0.5 - - # Download files for local verification - ctx.download_file("/remote/output.bin", "/local/output.bin") - return 0.0 -``` - -Available methods: - -| Category | Methods | -|----------|---------| -| **Terminal** | `terminal(command, timeout)` | -| **Files** | `read_file(path)`, `write_file(path, content)`, `search(query, path)` | -| **Transfers** | `upload_file()`, `upload_dir()`, `download_file()`, `download_dir()` | -| **Web** | `web_search(query)`, `web_extract(urls)` | -| **Browser** | `browser_navigate(url)`, `browser_snapshot()` | -| **Generic** | `call_tool(name, args)` โ€” escape hatch for any hermes-agent tool | -| **Cleanup** | `cleanup()` โ€” release all resources | - -### Tool Call Parsers - -For **Phase 2** (VLLM ManagedServer), the server returns raw text without structured tool calls. Client-side parsers in `environments/tool_call_parsers/` extract `tool_calls` from raw output: - -```python -from environments.tool_call_parsers import get_parser - -parser = get_parser("hermes") # or "mistral", "llama3_json", "qwen", "deepseek_v3", etc. -content, tool_calls = parser.parse(raw_model_output) -``` - -Available parsers: `hermes`, `mistral`, `llama3_json`, `llama4_json`, `qwen`, `qwen3_coder`, `deepseek_v3`, `deepseek_v3_1` (alias `deepseek_v31`), `kimi_k2`, `longcat`, `glm45`, `glm47`. - -In Phase 1 (OpenAI server type), parsers are not needed โ€” the server handles tool call parsing natively. - -## Available Benchmarks - -### TerminalBench2 - -**89 challenging terminal tasks** with per-task Docker sandbox environments. - -| | | -|---|---| -| **What it tests** | Single-task coding/sysadmin ability | -| **Scoring** | Binary pass/fail (test suite verification) | -| **Sandbox** | Modal cloud sandboxes (per-task Docker images) | -| **Tools** | `terminal` + `file` | -| **Tasks** | 89 tasks across multiple categories | -| **Cost** | ~$50โ€“200 for full eval (parallel execution) | -| **Time** | ~2โ€“4 hours | - -```bash -python environments/benchmarks/terminalbench_2/terminalbench2_env.py evaluate \ - --config environments/benchmarks/terminalbench_2/default.yaml - -# Run specific tasks -python environments/benchmarks/terminalbench_2/terminalbench2_env.py evaluate \ - --config environments/benchmarks/terminalbench_2/default.yaml \ - --env.task_filter fix-git,git-multibranch -``` - -Dataset: [NousResearch/terminal-bench-2](https://huggingface.co/datasets/NousResearch/terminal-bench-2) on HuggingFace. - -### TBLite (OpenThoughts Terminal Bench Lite) - -**100 difficulty-calibrated tasks** โ€” a faster proxy for TerminalBench2. - -| | | -|---|---| -| **What it tests** | Same as TB2 (coding/sysadmin), calibrated difficulty tiers | -| **Scoring** | Binary pass/fail | -| **Sandbox** | Modal cloud sandboxes | -| **Tools** | `terminal` + `file` | -| **Tasks** | 100 tasks: Easy (40), Medium (26), Hard (26), Extreme (8) | -| **Correlation** | r=0.911 with full TB2 | -| **Speed** | 2.6โ€“8ร— faster than TB2 | - -```bash -python environments/benchmarks/tblite/tblite_env.py evaluate \ - --config environments/benchmarks/tblite/default.yaml -``` - -TBLite is a thin subclass of TerminalBench2 โ€” only the dataset and timeouts differ. Created by the OpenThoughts Agent team (Snorkel AI + Bespoke Labs). Dataset: [NousResearch/openthoughts-tblite](https://huggingface.co/datasets/NousResearch/openthoughts-tblite). - -### YC-Bench - -**Long-horizon strategic benchmark** โ€” the agent plays CEO of an AI startup. - -| | | -|---|---| -| **What it tests** | Multi-turn strategic coherence over hundreds of turns | -| **Scoring** | Composite: `0.5 ร— survival + 0.5 ร— normalised_funds` | -| **Sandbox** | Local terminal (no Modal needed) | -| **Tools** | `terminal` only | -| **Runs** | 9 default (3 presets ร— 3 seeds), sequential | -| **Cost** | ~$50โ€“200 for full eval | -| **Time** | ~3โ€“6 hours | - -```bash -# Install yc-bench (optional dependency) -pip install "hermes-agent[yc-bench]" - -# Run evaluation -bash environments/benchmarks/yc_bench/run_eval.sh - -# Or directly -python environments/benchmarks/yc_bench/yc_bench_env.py evaluate \ - --config environments/benchmarks/yc_bench/default.yaml - -# Quick single-preset test -python environments/benchmarks/yc_bench/yc_bench_env.py evaluate \ - --config environments/benchmarks/yc_bench/default.yaml \ - --env.presets '["fast_test"]' --env.seeds '[1]' -``` - -YC-Bench uses [collinear-ai/yc-bench](https://github.com/collinear-ai/yc-bench) โ€” a deterministic simulation with 4 skill domains (research, inference, data_environment, training), prestige system, employee management, and financial pressure. Unlike TB2's per-task binary scoring, YC-Bench measures whether an agent can maintain coherent strategy over hundreds of compounding decisions. - -## Training Environments - -### TerminalTestEnv - -A minimal self-contained environment with inline tasks (no external dataset). Used for **validating the full stack** end-to-end. Each task asks the model to create a file at a known path; the verifier checks the content. - -```bash -# Process mode (saves rollouts to JSONL, no training server needed) -python environments/terminal_test_env/terminal_test_env.py process \ - --env.data_path_to_save_groups terminal_test_output.jsonl - -# Serve mode (connects to Atropos API for RL training) -python environments/terminal_test_env/terminal_test_env.py serve -``` - -### HermesSweEnv - -SWE-bench style training environment. The model gets a coding task, uses terminal + file + web tools to solve it, and the reward function runs tests in the same Modal sandbox. - -```bash -python environments/hermes_swe_env/hermes_swe_env.py serve \ - --openai.model_name YourModel \ - --env.dataset_name bigcode/humanevalpack \ - --env.terminal_backend modal -``` - -## Running Environments - -Every environment is a standalone Python script with three CLI subcommands: - -### `evaluate` โ€” Run a benchmark - -For eval-only environments (benchmarks). Runs all items, computes metrics, logs to wandb. - -```bash -python environments/benchmarks/tblite/tblite_env.py evaluate \ - --config environments/benchmarks/tblite/default.yaml \ - --openai.model_name anthropic/claude-sonnet-4.6 -``` - -No training server or `run-api` needed. The environment handles everything. - -### `process` โ€” Generate SFT data - -Runs rollouts and saves scored trajectories to JSONL. Useful for generating training data without a full RL loop. - -```bash -python environments/terminal_test_env/terminal_test_env.py process \ - --env.data_path_to_save_groups output.jsonl \ - --openai.model_name anthropic/claude-sonnet-4.6 -``` - -Output format: each line is a scored trajectory with the full conversation history, reward, and metadata. - -### `serve` โ€” Connect to Atropos for RL training - -Connects the environment to a running Atropos API server (`run-api`). Used during live RL training. - -```bash -# Terminal 1: Start the Atropos API -run-api - -# Terminal 2: Start the environment -python environments/hermes_swe_env/hermes_swe_env.py serve \ - --openai.model_name YourModel -``` - -The environment receives items from Atropos, runs agent rollouts, computes rewards, and sends scored trajectories back for training. - -## Two-Phase Operation - -### Phase 1: OpenAI Server (Eval / SFT) - -Uses `server.chat_completion()` with `tools=` parameter. The server (VLLM, SGLang, OpenRouter, OpenAI) handles tool call parsing natively. Returns `ChatCompletion` objects with structured `tool_calls`. - -- **Use for**: evaluation, SFT data generation, benchmarks, testing -- **Placeholder tokens** are created for the Atropos pipeline (since real token IDs aren't available from the OpenAI API) - -### Phase 2: VLLM ManagedServer (Full RL) - -Uses ManagedServer for exact token IDs + logprobs via `/generate`. A client-side [tool call parser](#tool-call-parsers) reconstructs structured `tool_calls` from raw output. - -- **Use for**: full RL training with GRPO/PPO -- **Real tokens**, masks, and logprobs flow through the pipeline -- Set `tool_call_parser` in config to match your model's format (e.g., `"hermes"`, `"qwen"`, `"mistral"`) - -## Creating Environments - -### Training Environment - -```python -from environments.hermes_base_env import HermesAgentBaseEnv, HermesAgentEnvConfig -from atroposlib.envs.server_handling.server_manager import APIServerConfig - -class MyEnvConfig(HermesAgentEnvConfig): - my_custom_field: str = "default_value" - -class MyEnv(HermesAgentBaseEnv): - name = "my-env" - env_config_cls = MyEnvConfig - - @classmethod - def config_init(cls): - env_config = MyEnvConfig( - enabled_toolsets=["terminal", "file"], - terminal_backend="modal", - max_agent_turns=30, - ) - server_configs = [APIServerConfig( - base_url="https://openrouter.ai/api/v1", - model_name="anthropic/claude-sonnet-4.6", - server_type="openai", - )] - return env_config, server_configs - - async def setup(self): - from datasets import load_dataset - self.dataset = list(load_dataset("my-dataset", split="train")) - self.iter = 0 - - async def get_next_item(self): - item = self.dataset[self.iter % len(self.dataset)] - self.iter += 1 - return item - - def format_prompt(self, item): - return item["instruction"] - - async def compute_reward(self, item, result, ctx): - # ctx gives full tool access to the rollout's sandbox - test = ctx.terminal("pytest -v") - return 1.0 if test["exit_code"] == 0 else 0.0 - - async def evaluate(self, *args, **kwargs): - # Periodic evaluation during training - pass - -if __name__ == "__main__": - MyEnv.cli() -``` - -### Eval-Only Benchmark - -For benchmarks, follow the pattern used by TerminalBench2, TBLite, and YC-Bench: - -1. **Create under** `environments/benchmarks/your-benchmark/` -2. **Set eval-only config**: `eval_handling=STOP_TRAIN`, `steps_per_eval=1`, `total_steps=1` -3. **Stub training methods**: `collect_trajectories()` returns `(None, [])`, `score()` returns `None` -4. **Implement** `rollout_and_score_eval(eval_item)` โ€” the per-item agent loop + scoring -5. **Implement** `evaluate()` โ€” orchestrates all runs, computes aggregate metrics -6. **Add streaming JSONL** for crash-safe result persistence -7. **Add cleanup**: `KeyboardInterrupt` handling, `cleanup_all_environments()`, `_tool_executor.shutdown()` -8. **Run with** `evaluate` subcommand - -See `environments/benchmarks/yc_bench/yc_bench_env.py` for a clean, well-documented reference implementation. - -## Configuration Reference - -### HermesAgentEnvConfig Fields - -| Field | Type | Default | Description | -|-------|------|---------|-------------| -| `enabled_toolsets` | `List[str]` | `None` (all) | Which hermes toolsets to enable | -| `disabled_toolsets` | `List[str]` | `None` | Toolsets to filter out | -| `distribution` | `str` | `None` | Probabilistic toolset distribution name | -| `max_agent_turns` | `int` | `30` | Max LLM calls per rollout | -| `agent_temperature` | `float` | `1.0` | Sampling temperature | -| `system_prompt` | `str` | `None` | System message for the agent | -| `terminal_backend` | `str` | `"local"` | `local`, `docker`, `modal`, `daytona`, `ssh`, `singularity` | -| `terminal_timeout` | `int` | `120` | Seconds per terminal command | -| `terminal_lifetime` | `int` | `3600` | Max sandbox lifetime | -| `dataset_name` | `str` | `None` | HuggingFace dataset identifier | -| `tool_pool_size` | `int` | `128` | Thread pool size for tool execution | -| `tool_call_parser` | `str` | `"hermes"` | Parser for Phase 2 raw output | -| `extra_body` | `Dict` | `None` | Extra params for OpenAI API (e.g., OpenRouter provider prefs) | -| `eval_handling` | `Enum` | `STOP_TRAIN` | `STOP_TRAIN`, `LIMIT_TRAIN`, `NONE` | - -### YAML Configuration - -Environments can be configured via YAML files passed with `--config`: - -```yaml -env: - enabled_toolsets: ["terminal", "file"] - max_agent_turns: 60 - max_token_length: 32000 - agent_temperature: 0.8 - terminal_backend: "modal" - terminal_timeout: 300 - dataset_name: "NousResearch/terminal-bench-2" - tokenizer_name: "NousResearch/Hermes-3-Llama-3.1-8B" - use_wandb: true - wandb_name: "my-benchmark" - -openai: - base_url: "https://openrouter.ai/api/v1" - model_name: "anthropic/claude-sonnet-4.6" - server_type: "openai" - health_check: false -``` - -YAML values override `config_init()` defaults. CLI arguments override YAML values: - -```bash -python my_env.py evaluate \ - --config my_config.yaml \ - --openai.model_name anthropic/claude-opus-4.6 # overrides YAML -``` - -## Prerequisites - -### For all environments - -- Python >= 3.11 -- `atroposlib`: `pip install git+https://github.com/NousResearch/atropos.git` -- An LLM API key (OpenRouter, OpenAI, or self-hosted VLLM/SGLang) - -### For Modal-sandboxed benchmarks (TB2, TBLite) - -- [Modal](https://modal.com) account and CLI: `pip install "hermes-agent[modal]"` -- `MODAL_TOKEN_ID` and `MODAL_TOKEN_SECRET` environment variables - -### For YC-Bench - -- `pip install "hermes-agent[yc-bench]"` (installs the yc-bench CLI + SQLAlchemy) -- No Modal needed โ€” runs with local terminal backend - -### For RL training - -- `TINKER_API_KEY` โ€” API key for the [Tinker](https://tinker.computer) training service -- `WANDB_API_KEY` โ€” for Weights & Biases metrics tracking -- The `tinker-atropos` submodule (at `tinker-atropos/` in the repo) - -See [RL Training](/user-guide/features/rl-training) for the agent-driven RL workflow. - -## Directory Structure - -``` -environments/ -โ”œโ”€โ”€ hermes_base_env.py # Abstract base class (HermesAgentBaseEnv) -โ”œโ”€โ”€ agent_loop.py # Multi-turn agent engine (HermesAgentLoop) -โ”œโ”€โ”€ tool_context.py # Per-rollout tool access for reward functions -โ”œโ”€โ”€ patches.py # Async-safety patches for Modal backend -โ”‚ -โ”œโ”€โ”€ tool_call_parsers/ # Phase 2 client-side parsers -โ”‚ โ”œโ”€โ”€ hermes_parser.py # Hermes/ChatML format -โ”‚ โ”œโ”€โ”€ mistral_parser.py # Mistral [TOOL_CALLS] format -โ”‚ โ”œโ”€โ”€ llama_parser.py # Llama 3 JSON tool calling -โ”‚ โ”œโ”€โ”€ qwen_parser.py # Qwen format -โ”‚ โ”œโ”€โ”€ deepseek_v3_parser.py # DeepSeek V3 format -โ”‚ โ””โ”€โ”€ ... # + kimi_k2, longcat, glm45/47, etc. -โ”‚ -โ”œโ”€โ”€ terminal_test_env/ # Stack validation (inline tasks) -โ”œโ”€โ”€ hermes_swe_env/ # SWE-bench training environment -โ”‚ -โ””โ”€โ”€ benchmarks/ # Evaluation benchmarks - โ”œโ”€โ”€ terminalbench_2/ # 89 terminal tasks, Modal sandboxes - โ”œโ”€โ”€ tblite/ # 100 calibrated tasks (fast TB2 proxy) - โ””โ”€โ”€ yc_bench/ # Long-horizon strategic benchmark -``` diff --git a/website/docs/getting-started/updating.md b/website/docs/getting-started/updating.md index 55df5a7f6406..aa2a426db998 100644 --- a/website/docs/getting-started/updating.md +++ b/website/docs/getting-started/updating.md @@ -123,13 +123,11 @@ If you installed manually (not via the quick installer): cd /path/to/hermes-agent export VIRTUAL_ENV="$(pwd)/venv" -# Pull latest code and submodules +# Pull latest code git pull origin main -git submodule update --init --recursive # Reinstall (picks up new dependencies) uv pip install -e ".[all]" -uv pip install -e "./tinker-atropos" # Check for new config options hermes config check diff --git a/website/docs/integrations/index.md b/website/docs/integrations/index.md index 21235a12ba18..d80a61abd8c4 100644 --- a/website/docs/integrations/index.md +++ b/website/docs/integrations/index.md @@ -97,5 +97,4 @@ See the [Messaging Gateway overview](/docs/user-guide/messaging) for the platfor ## Training & Evaluation -- **[RL Training](/docs/user-guide/features/rl-training)** โ€” Generate trajectory data from agent sessions for reinforcement learning and model fine-tuning. Supports Atropos environments with customizable reward functions. - **[Batch Processing](/docs/user-guide/features/batch-processing)** โ€” Run the agent across hundreds of prompts in parallel, generating structured ShareGPT-format trajectory data for training data generation or evaluation. diff --git a/website/docs/integrations/providers.md b/website/docs/integrations/providers.md index b53ab15ed843..af9e07814d7e 100644 --- a/website/docs/integrations/providers.md +++ b/website/docs/integrations/providers.md @@ -1355,7 +1355,6 @@ You can switch between providers at any time with `hermes model` โ€” no restart | Premium TTS voices | [ElevenLabs](https://elevenlabs.io/) | `ELEVENLABS_API_KEY` | | OpenAI TTS + voice transcription | [OpenAI](https://platform.openai.com/api-keys) | `VOICE_TOOLS_OPENAI_KEY` | | Mistral TTS + voice transcription | [Mistral](https://console.mistral.ai/) | `MISTRAL_API_KEY` | -| RL Training | [Tinker](https://tinker-console.thinkingmachines.ai/) + [WandB](https://wandb.ai/) | `TINKER_API_KEY`, `WANDB_API_KEY` | | Cross-session user modeling | [Honcho](https://honcho.dev/) | `HONCHO_API_KEY` | | Semantic long-term memory | [Supermemory](https://supermemory.ai) | `SUPERMEMORY_API_KEY` | diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md index eb2bc8162028..93107fba147e 100644 --- a/website/docs/reference/environment-variables.md +++ b/website/docs/reference/environment-variables.md @@ -135,6 +135,7 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe | `CAMOFOX_SESSION_KEY` | Optional Camofox session key used when creating tabs for `CAMOFOX_USER_ID` | | `CAMOFOX_ADOPT_EXISTING_TAB` | Set to `true` to reuse an existing Camofox tab before creating a new one | | `BROWSER_INACTIVITY_TIMEOUT` | Browser session inactivity timeout in seconds | +| `AGENT_BROWSER_ARGS` | Extra Chromium launch flags (comma- or newline-separated). Hermes auto-injects `--no-sandbox,--disable-dev-shm-usage` when running as root or on AppArmor-restricted unprivileged user namespaces (Ubuntu 23.10+, DGX Spark, many container images); set this manually only to override or add other flags. | | `FAL_KEY` | Image generation ([fal.ai](https://fal.ai/)) | | `GROQ_API_KEY` | Groq Whisper STT API key ([groq.com](https://groq.com/)) | | `ELEVENLABS_API_KEY` | ElevenLabs premium TTS voices ([elevenlabs.io](https://elevenlabs.io/)) | @@ -147,8 +148,6 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe | `HONCHO_BASE_URL` | Base URL for self-hosted Honcho instances (default: Honcho cloud). No API key required for local instances | | `HINDSIGHT_TIMEOUT` | Timeout in seconds for Hindsight memory-provider API calls (default: `60`). Bump this if your Hindsight instance is slow to respond during `/sync` or `on_session_switch` and you're seeing timeouts in `errors.log`. | | `SUPERMEMORY_API_KEY` | Semantic long-term memory with profile recall and session ingest ([supermemory.ai](https://supermemory.ai)) | -| `TINKER_API_KEY` | RL training ([tinker-console.thinkingmachines.ai](https://tinker-console.thinkingmachines.ai/)) | -| `WANDB_API_KEY` | RL training metrics ([wandb.ai](https://wandb.ai/)) | | `DAYTONA_API_KEY` | Daytona cloud sandboxes ([daytona.io](https://daytona.io/)) | | `VERCEL_TOKEN` | Vercel Sandbox access token ([vercel.com](https://vercel.com/)) | | `VERCEL_PROJECT_ID` | Vercel project ID (required with `VERCEL_TOKEN`) | diff --git a/website/docs/reference/optional-skills-catalog.md b/website/docs/reference/optional-skills-catalog.md index 40f9c5539c8c..8c4c2f364322 100644 --- a/website/docs/reference/optional-skills-catalog.md +++ b/website/docs/reference/optional-skills-catalog.md @@ -120,7 +120,6 @@ hermes skills uninstall | [**faiss**](/docs/user-guide/skills/optional/mlops/mlops-faiss) | Facebook's library for efficient similarity search and clustering of dense vectors. Supports billions of vectors, GPU acceleration, and various index types (Flat, IVF, HNSW). Use for fast k-NN search, large-scale vector retrieval, or whe... | | [**optimizing-attention-flash**](/docs/user-guide/skills/optional/mlops/mlops-flash-attention) | Optimizes transformer attention with Flash Attention for 2-4x speedup and 10-20x memory reduction. Use when training/running transformers with long sequences (>512 tokens), encountering GPU memory issues with attention, or need faster in... | | [**guidance**](/docs/user-guide/skills/optional/mlops/mlops-guidance) | Control LLM output with regex and grammars, guarantee valid JSON/XML/code generation, enforce structured formats, and build multi-step workflows with Guidance - Microsoft Research's constrained generation framework | -| [**hermes-atropos-environments**](/docs/user-guide/skills/optional/mlops/mlops-hermes-atropos-environments) | Build, test, and debug Hermes Agent RL environments for Atropos training. Covers the HermesAgentBaseEnv interface, reward functions, agent loop integration, evaluation with tools, wandb logging, and the three CLI modes (serve/process/eva... | | [**huggingface-tokenizers**](/docs/user-guide/skills/optional/mlops/mlops-huggingface-tokenizers) | Fast tokenizers optimized for research and production. Rust-based implementation tokenizes 1GB in <20 seconds. Supports BPE, WordPiece, and Unigram algorithms. Train custom vocabularies, track alignments, handle padding/truncation. Integ... | | [**instructor**](/docs/user-guide/skills/optional/mlops/mlops-instructor) | Extract structured data from LLM responses with Pydantic validation, retry failed extractions automatically, parse complex JSON with type safety, and stream partial results with Instructor - battle-tested structured output library | | [**lambda-labs-gpu-cloud**](/docs/user-guide/skills/optional/mlops/mlops-lambda-labs) | Reserved and on-demand GPU cloud instances for ML training and inference. Use when you need dedicated GPU instances with simple SSH access, persistent filesystems, or high-performance multi-node clusters for large-scale training. | diff --git a/website/docs/reference/tools-reference.md b/website/docs/reference/tools-reference.md index 5d0100de79dd..03930264f8cb 100644 --- a/website/docs/reference/tools-reference.md +++ b/website/docs/reference/tools-reference.md @@ -148,21 +148,6 @@ Registered only when the agent is spawned by the kanban dispatcher (`HERMES_KANB |------|-------------|----------------------| | `mixture_of_agents` | Route a hard problem through multiple frontier LLMs collaboratively. Makes 5 API calls (4 reference models + 1 aggregator) with maximum reasoning effort โ€” use sparingly for genuinely difficult problems. Best for: complex math, advanced algโ€ฆ | OPENROUTER_API_KEY | -## `rl` toolset - -| Tool | Description | Requires environment | -|------|-------------|----------------------| -| `rl_check_status` | Get status and metrics for a training run. RATE LIMITED: enforces 30-minute minimum between checks for the same run. Returns WandB metrics: step, state, reward_mean, loss, percent_correct. | TINKER_API_KEY, WANDB_API_KEY | -| `rl_edit_config` | Update a configuration field. Use rl_get_current_config() first to see all available fields for the selected environment. Each environment has different configurable options. Infrastructure settings (tokenizer, URLs, lora_rank, learning_raโ€ฆ | TINKER_API_KEY, WANDB_API_KEY | -| `rl_get_current_config` | Get the current environment configuration. Returns only fields that can be modified: group_size, max_token_length, total_steps, steps_per_eval, use_wandb, wandb_name, max_num_workers. | TINKER_API_KEY, WANDB_API_KEY | -| `rl_get_results` | Get final results and metrics for a completed training run. Returns final metrics and path to trained weights. | TINKER_API_KEY, WANDB_API_KEY | -| `rl_list_environments` | List all available RL environments. Returns environment names, paths, and descriptions. TIP: Read the file_path with file tools to understand how each environment works (verifiers, data loading, rewards). | TINKER_API_KEY, WANDB_API_KEY | -| `rl_list_runs` | List all training runs (active and completed) with their status. | TINKER_API_KEY, WANDB_API_KEY | -| `rl_select_environment` | Select an RL environment for training. Loads the environment's default configuration. After selecting, use rl_get_current_config() to see settings and rl_edit_config() to modify them. | TINKER_API_KEY, WANDB_API_KEY | -| `rl_start_training` | Start a new RL training run with the current environment and config. Most training parameters (lora_rank, learning_rate, etc.) are fixed. Use rl_edit_config() to set group_size, batch_size, wandb_project before starting. WARNING: Trainingโ€ฆ | TINKER_API_KEY, WANDB_API_KEY | -| `rl_stop_training` | Stop a running training job. Use if metrics look bad, training is stagnant, or you want to try different settings. | TINKER_API_KEY, WANDB_API_KEY | -| `rl_test_inference` | Quick inference test for any environment. Runs a few steps of inference + scoring using OpenRouter. Default: 3 steps x 16 completions = 48 rollouts per model, testing 3 models = 144 total. Tests environment loading, prompt construction, inโ€ฆ | TINKER_API_KEY, WANDB_API_KEY | - ## `session_search` toolset | Tool | Description | Requires environment | diff --git a/website/docs/reference/toolsets-reference.md b/website/docs/reference/toolsets-reference.md index ce11d86cb416..5bf1f14260e9 100644 --- a/website/docs/reference/toolsets-reference.md +++ b/website/docs/reference/toolsets-reference.md @@ -45,7 +45,7 @@ Or in-session: ``` /tools list /tools disable browser -/tools enable rl +/tools enable homeassistant ``` ## Core Toolsets @@ -71,7 +71,6 @@ Or in-session: | `memory` | `memory` | Persistent cross-session memory management. | | `messaging` | `send_message` | Send messages to other platforms (Telegram, Discord, etc.) from within a session. | | `moa` | `mixture_of_agents` | Multi-model consensus via Mixture of Agents. | -| `rl` | `rl_check_status`, `rl_edit_config`, `rl_get_current_config`, `rl_get_results`, `rl_list_environments`, `rl_list_runs`, `rl_select_environment`, `rl_start_training`, `rl_stop_training`, `rl_test_inference` | RL training environment management (Atropos). | | `safe` | `image_generate`, `vision_analyze`, `web_extract`, `web_search` (via `includes`) | Read-only research + media generation. No file writes, no terminal, no code execution. | | `search` | `web_search` | Web search only (without extract). | | `session_search` | `session_search` | Search past conversation sessions. | diff --git a/website/docs/user-guide/features/acp.md b/website/docs/user-guide/features/acp.md index 1822f7adfad1..6540748c8890 100644 --- a/website/docs/user-guide/features/acp.md +++ b/website/docs/user-guide/features/acp.md @@ -45,6 +45,14 @@ This installs the `agent-client-protocol` dependency and enables: - `hermes-acp` - `python -m acp_adapter` +For Zed registry installs, Zed launches Hermes through the official ACP Registry entry. That entry uses a `uvx` distribution that runs: + +```bash +uvx --from 'hermes-agent[acp]==' hermes-acp +``` + +Make sure `uv` is available on `PATH` before using the registry install path. + ## Launching the ACP server Any of the following starts Hermes in ACP mode: @@ -63,6 +71,34 @@ python -m acp_adapter Hermes logs to stderr so stdout remains reserved for ACP JSON-RPC traffic. +For non-interactive checks: + +```bash +hermes acp --version +hermes acp --check +``` + +### Browser tools (optional) + +Browser tools (`browser_navigate`, `browser_click`, etc.) depend on the +`agent-browser` npm package and Chromium, which aren't part of the Python +wheel. Install them with: + +```bash +hermes acp --setup-browser # interactive (prompts before ~400 MB download) +hermes acp --setup-browser --yes # accept the download non-interactively +``` + +This is the standalone command. The Zed registry's terminal-auth flow (`hermes acp --setup`) also offers the browser bootstrap as a follow-up question after model selection, so most users never need to run `--setup-browser` directly. + +What it does: + +- Installs Node.js 22 LTS into `~/.hermes/node/` if missing +- `npm install -g agent-browser @askjo/camofox-browser` into that prefix (no sudo needed โ€” `npm`'s `--prefix` points at the user-writable Hermes-managed Node) +- Installs Playwright Chromium, or uses a detected system Chrome/Chromium when available + +The bootstrap is idempotent โ€” re-running it is fast and skips work that's already done. + ## Editor setup ### VS Code @@ -90,7 +126,19 @@ If you want to define Hermes manually, add it through VS Code settings under `ac ### Zed -Example settings snippet: +Zed v0.221.x and newer installs external agents through the official ACP Registry. + +1. Open the Agent Panel. +2. Click **Add Agent**, or run the `zed: acp registry` command. +3. Search for **Hermes Agent**. +4. Install it and start a new Hermes external-agent thread. + +Prerequisites: + +- Configure Hermes provider credentials first with `hermes model`, or set them in `~/.hermes/.env` / `~/.hermes/config.yaml`. +- Install `uv` so the registry launcher can run `uvx --from 'hermes-agent[acp]==' hermes-acp`. + +For local development before the registry entry is available, use a custom agent server in Zed settings: ```json { @@ -98,9 +146,9 @@ Example settings snippet: "hermes-agent": { "type": "custom", "command": "hermes", - "args": ["acp"], - }, - }, + "args": ["acp"] + } + } } ``` @@ -114,18 +162,23 @@ Use an ACP-compatible plugin and point it at: ## Registry manifest -The ACP registry manifest lives at: +The source copy of Hermes' official ACP Registry metadata lives at: ```text acp_registry/agent.json +acp_registry/icon.svg ``` -It advertises a command-based agent whose launch command is: +The upstream registry PR copies those files into the top-level `hermes-agent/` directory in `agentclientprotocol/registry`. + +The registry entry uses a `uvx` distribution that points directly at the `hermes-agent` PyPI release: ```text -hermes acp +uvx --from 'hermes-agent[acp]==' hermes-acp ``` +The registry CI verifies that the pinned version exists on PyPI, so the manifest's `version` and uvx `package` pin must always match `pyproject.toml`. `scripts/release.py` keeps them in lockstep automatically. + ## Configuration and credentials ACP mode uses the same Hermes configuration as the CLI: @@ -135,7 +188,7 @@ ACP mode uses the same Hermes configuration as the CLI: - `~/.hermes/skills/` - `~/.hermes/state.db` -Provider resolution uses Hermes' normal runtime resolver, so ACP inherits the currently configured provider and credentials. +Provider resolution uses Hermes' normal runtime resolver, so ACP inherits the currently configured provider and credentials. Hermes also advertises a terminal auth method (`--setup`) for first-run registry clients; this opens Hermes' interactive model/provider setup. ## Session behavior @@ -171,29 +224,36 @@ On timeout or error, the approval bridge denies the request. Check: -- the editor is pointed at the correct `acp_registry/` path -- Hermes is installed and on your PATH -- the ACP extra is installed (`pip install -e '.[acp]'`) +- In Zed, open the ACP Registry with `zed: acp registry` and search for **Hermes Agent**. +- For manual/local development, verify the custom `agent_servers` command points to `hermes acp`. +- Hermes is installed and on your PATH. +- The ACP extra is installed (`pip install -e '.[acp]'`). +- `uv` is installed if launching from the official Zed registry entry. ### ACP starts but immediately errors Try these checks: ```bash +hermes acp --version +hermes acp --check hermes doctor hermes status -hermes acp ``` ### Missing credentials -ACP mode does not have its own login flow. It uses Hermes' existing provider setup. Configure credentials with: +ACP mode uses Hermes' existing provider setup. Configure credentials with: ```bash hermes model ``` -or by editing `~/.hermes/.env`. +or by editing `~/.hermes/.env`. Registry clients can also trigger Hermes' terminal auth flow, which runs the same interactive provider/model setup. + +### Zed registry launcher cannot find uv + +Install `uv` from the official uv installation docs, then retry the Hermes Agent thread from Zed. ## See also diff --git a/website/docs/user-guide/features/browser.md b/website/docs/user-guide/features/browser.md index e27101a64725..1da4a8f2a36d 100644 --- a/website/docs/user-guide/features/browser.md +++ b/website/docs/user-guide/features/browser.md @@ -368,6 +368,13 @@ BROWSERBASE_SESSION_TIMEOUT=600000 # Inactivity timeout before auto-cleanup in seconds (default: 120) BROWSER_INACTIVITY_TIMEOUT=120 + +# Extra Chromium launch flags (comma- or newline-separated). Hermes auto-injects +# `--no-sandbox,--disable-dev-shm-usage` when it detects root or AppArmor-restricted +# unprivileged user namespaces (Ubuntu 23.10+, DGX Spark, many container images), +# so most users don't need to set this. Set it manually only if you need a flag +# Hermes doesn't add automatically; setting it disables the auto-injection. +AGENT_BROWSER_ARGS=--no-sandbox ``` ### Install agent-browser CLI diff --git a/website/docs/user-guide/features/cron.md b/website/docs/user-guide/features/cron.md index c2c67df8a2af..9a14e6dcd1ea 100644 --- a/website/docs/user-guide/features/cron.md +++ b/website/docs/user-guide/features/cron.md @@ -522,6 +522,86 @@ print(json.dumps({"wakeAgent": True, "context": {"new_issues": latest - prev}})) When `wakeAgent` is omitted, the default is `true` (wake the agent as usual). +#### Recipes: cheap pre-run gates + +The `wakeAgent` gate gives you a $0 way to decide whether a scheduled job should spend any LLM tokens at all. Three patterns cover most use cases. + +**File-change gate** โ€” only run when a watched file has new content since the last successful tick. The scheduler records each job's `last_run_at`; compare it against the file's mtime. + +```bash +#!/bin/bash +# ~/.hermes/scripts/feed-changed.sh +FEED="$HOME/data/feed.json" +STATE="$HOME/.hermes/scripts/.feed-changed.last" +test -f "$FEED" || { echo '{"wakeAgent": false}'; exit 0; } +mtime=$(stat -c %Y "$FEED") +last=$(cat "$STATE" 2>/dev/null || echo 0) +if [ "$mtime" -le "$last" ]; then + echo '{"wakeAgent": false}' +else + echo "$mtime" > "$STATE" + echo '{"wakeAgent": true}' +fi +``` + +```text +cronjob(action="create", name="process-feed", + schedule="every 30m", + script="feed-changed.sh", + prompt="A new ~/data/feed.json has landed. Summarize what changed.") +``` + +**External-flag gate** โ€” only run when some other process has signalled readiness (e.g. a deploy hook drops a file, a CI job sets a value in your state store). + +```bash +#!/bin/bash +# ~/.hermes/scripts/flag-ready.sh +if test -f /tmp/new-data-ready; then + rm -f /tmp/new-data-ready + echo '{"wakeAgent": true}' +else + echo '{"wakeAgent": false}' +fi +``` + +```text +cronjob(action="create", name="nightly-analysis", + schedule="0 9 * * *", + script="flag-ready.sh", + prompt="Run the nightly analysis over today's batch.") +``` + +**SQL-count gate** โ€” only run when there are new rows to process in your own database. The script can also pass the count through to the agent via `context`, so the agent knows how much it's looking at without re-querying. + +```python +#!/usr/bin/env python +# ~/.hermes/scripts/new-rows.py +import json, sqlite3 +conn = sqlite3.connect("/home/me/data/app.db") +n = conn.execute( + "SELECT COUNT(*) FROM messages WHERE ts > strftime('%s','now','-2 hours')" +).fetchone()[0] +if n < 1: + print(json.dumps({"wakeAgent": False})) +else: + print(json.dumps({"wakeAgent": True, "context": {"new_rows": n}})) +``` + +```text +cronjob(action="create", name="summarize-new-msgs", + schedule="every 2h", + script="new-rows.py", + prompt="Summarize the new messages from the last 2 hours.") +``` + +The same pattern works for any data source you can query from a script โ€” Postgres, an HTTP API, your own state store โ€” without baking a SQL evaluator into the cron subsystem. + +:::tip +Hermes's own `~/.hermes/state.db` is an internal schema that changes between releases. Don't query it from a pre-run gate โ€” point at your own database or feed instead. +::: + +Credit: this recipe set was prompted by @iankar8's exploration in [#2654](https://github.com/NousResearch/hermes-agent/pull/2654), which proposed adding sql/file/command triggers as a parallel mechanism. The `script` + `wakeAgent` gate already covers all three cases at $0, so the work landed as documentation instead. + ### Chaining jobs: `context_from` A cron job can consume the most recent successful output of one or more other jobs by listing their names (or IDs) in `context_from`: diff --git a/website/docs/user-guide/features/rl-training.md b/website/docs/user-guide/features/rl-training.md deleted file mode 100644 index 81fc6539b376..000000000000 --- a/website/docs/user-guide/features/rl-training.md +++ /dev/null @@ -1,234 +0,0 @@ ---- -sidebar_position: 13 -title: "RL Training" -description: "Reinforcement learning on agent behaviors with Tinker-Atropos โ€” environment discovery, training, and evaluation" ---- - -# RL Training - -Hermes Agent includes an integrated RL (Reinforcement Learning) training pipeline built on **Tinker-Atropos**. This enables training language models on environment-specific tasks using GRPO (Group Relative Policy Optimization) with LoRA adapters, orchestrated entirely through the agent's tool interface. - -## Overview - -The RL training system consists of three components: - -1. **[Atropos](https://github.com/NousResearch/atropos)** โ€” A trajectory API server that coordinates environment interactions, manages rollout groups, and computes advantages -2. **[Tinker](https://thinkingmachines.ai/tinker/)** โ€” A training service that handles model weights, LoRA training, sampling/inference, and optimizer steps -3. **Environments** โ€” Python classes that define tasks, scoring, and reward functions (e.g., GSM8K math problems) - -The agent can discover environments, configure training parameters, launch training runs, and monitor metrics โ€” all through a set of `rl_*` tools. - -## Requirements - -RL training requires: - -- **Python >= 3.11** (Tinker package requirement) -- **TINKER_API_KEY** โ€” API key for the Tinker training service -- **WANDB_API_KEY** โ€” API key for [Weights & Biases](https://wandb.ai/) metrics tracking -- The `tinker-atropos` submodule (at `tinker-atropos/` relative to the Hermes root) - -```bash -# Set up API keys -hermes config set TINKER_API_KEY your-tinker-key -hermes config set WANDB_API_KEY your-wandb-key -``` - -When both keys are present and Python >= 3.11 is available, the `rl` toolset is automatically enabled. - -## Available Tools - -| Tool | Description | -|------|-------------| -| `rl_list_environments` | Discover available RL environments | -| `rl_select_environment` | Select an environment and load its config | -| `rl_get_current_config` | View configurable and locked fields | -| `rl_edit_config` | Modify configurable training parameters | -| `rl_start_training` | Launch a training run (spawns 3 processes) | -| `rl_check_status` | Monitor training progress and WandB metrics | -| `rl_stop_training` | Stop a running training job | -| `rl_get_results` | Get final metrics and model weights path | -| `rl_list_runs` | List all active and completed runs | -| `rl_test_inference` | Quick inference test using OpenRouter | - -## Workflow - -### 1. Discover Environments - -``` -List the available RL environments -``` - -The agent calls `rl_list_environments()` which scans `tinker-atropos/tinker_atropos/environments/` using AST parsing to find Python classes inheriting from `BaseEnv`. Each environment defines: - -- **Dataset loading** โ€” where training data comes from (e.g., HuggingFace datasets) -- **Prompt construction** โ€” how to format items for the model -- **Scoring/verification** โ€” how to evaluate model outputs and assign rewards - -### 2. Select and Configure - -``` -Select the GSM8K environment and show me the configuration -``` - -The agent calls `rl_select_environment("gsm8k_tinker")`, then `rl_get_current_config()` to see all parameters. - -Configuration fields are divided into two categories: - -**Configurable fields** (can be modified): -- `group_size` โ€” Number of completions per item (default: 16) -- `batch_size` โ€” Training batch size (default: 128) -- `wandb_name` โ€” WandB run name (auto-set to `{env}-{timestamp}`) -- Other environment-specific parameters - -**Locked fields** (infrastructure settings, cannot be changed): -- `tokenizer_name` โ€” Model tokenizer (e.g., `Qwen/Qwen3-8B`) -- `rollout_server_url` โ€” Atropos API URL (`http://localhost:8000`) -- `max_token_length` โ€” Maximum token length (8192) -- `max_num_workers` โ€” Maximum parallel workers (2048) -- `total_steps` โ€” Total training steps (2500) -- `lora_rank` โ€” LoRA adapter rank (32) -- `learning_rate` โ€” Learning rate (4e-5) -- `max_token_trainer_length` โ€” Max tokens for trainer (9000) - -### 3. Start Training - -``` -Start the training run -``` - -The agent calls `rl_start_training()` which: - -1. Generates a YAML config file merging locked settings with configurable overrides -2. Creates a unique run ID -3. Spawns three processes: - - **Atropos API server** (`run-api`) โ€” trajectory coordination - - **Tinker trainer** (`launch_training.py`) โ€” LoRA training + FastAPI inference server on port 8001 - - **Environment** (`environment.py serve`) โ€” the selected environment connecting to Atropos - -The processes start with staggered delays (5s for API, 30s for trainer, 90s more for environment) to ensure proper initialization order. - -### 4. Monitor Progress - -``` -Check the status of training run abc12345 -``` - -The agent calls `rl_check_status(run_id)` which reports: - -- Process status (running/exited for each of the 3 processes) -- Running time -- WandB metrics (step, reward mean, percent correct, eval accuracy) -- Log file locations for debugging - -:::note Rate Limiting -Status checks are rate-limited to once every **30 minutes** per run ID. This prevents excessive polling during long-running training jobs that take hours. -::: - -### 5. Stop or Get Results - -``` -Stop the training run -# or -Get the final results for run abc12345 -``` - -`rl_stop_training()` terminates all three processes in reverse order (environment โ†’ trainer โ†’ API). `rl_get_results()` retrieves final WandB metrics and training history. - -## Inference Testing - -Before committing to a full training run, you can test if an environment works correctly using `rl_test_inference`. This runs a few steps of inference and scoring using OpenRouter โ€” no Tinker API needed, just an `OPENROUTER_API_KEY`. - -``` -Test the selected environment with inference -``` - -Default configuration: -- **3 steps ร— 16 completions = 48 rollouts per model** -- Tests 3 models at different scales for robustness: - - `qwen/qwen3-8b` (small) - - `z-ai/glm-4.7-flash` (medium) - - `minimax/minimax-m2.7` (large) -- Total: ~144 rollouts - -This validates: -- Environment loads correctly -- Prompt construction works -- Inference response parsing is robust across model scales -- Verifier/scoring logic produces valid rewards - -## Tinker API Integration - -The trainer uses the [Tinker](https://tinker.computer) API for model training operations: - -- **ServiceClient** โ€” Creates training and sampling clients -- **Training client** โ€” Handles forward-backward passes with importance sampling loss, optimizer steps (Adam), and weight checkpointing -- **Sampling client** โ€” Provides inference using the latest trained weights - -The training loop: -1. Fetches a batch of rollouts from Atropos (prompt + completions + scores) -2. Converts to Tinker Datum objects with padded logprobs and advantages -3. Runs forward-backward pass with importance sampling loss -4. Takes an optimizer step (Adam: lr=4e-5, ฮฒ1=0.9, ฮฒ2=0.95) -5. Saves weights and creates a new sampling client for next-step inference -6. Logs metrics to WandB - -## Architecture Diagram - -```mermaid -flowchart LR - api["Atropos API
run-api
port 8000"] - env["Environment
BaseEnv implementation"] - infer["OpenAI / sglang
inference API
port 8001"] - trainer["Tinker Trainer
LoRA training + FastAPI"] - - env <--> api - env --> infer - api -->|"batches: tokens, scores, logprobs"| trainer - trainer -->|"serves inference"| infer -``` - -## Creating Custom Environments - -To create a new RL environment: - -1. Create a Python file in `tinker-atropos/tinker_atropos/environments/` -2. Define a class that inherits from `BaseEnv` -3. Implement the required methods: - - `load_dataset()` โ€” Load your training data - - `get_next_item()` โ€” Provide the next item to the model - - `score_answer()` โ€” Score model outputs and assign rewards - - `collect_trajectories()` โ€” Collect and return trajectories -4. Optionally define a custom config class inheriting from `BaseEnvConfig` - -Study the existing `gsm8k_tinker.py` as a template. The agent can help you create new environments โ€” it can read existing environment files, inspect HuggingFace datasets, and write new environment code. - -## WandB Metrics - -Training runs log to Weights & Biases with these key metrics: - -| Metric | Description | -|--------|-------------| -| `train/loss` | Training loss (importance sampling) | -| `train/learning_rate` | Current learning rate | -| `reward/mean` | Mean reward across groups | -| `logprobs/mean` | Mean reference logprobs | -| `logprobs/mean_training` | Mean training logprobs | -| `logprobs/diff` | Logprob drift (reference - training) | -| `advantages/mean` | Mean advantage values | -| `advantages/std` | Advantage standard deviation | - -## Log Files - -Each training run generates log files in `~/.hermes/logs/rl_training/`: - -``` -logs/ -โ”œโ”€โ”€ api_{run_id}.log # Atropos API server logs -โ”œโ”€โ”€ trainer_{run_id}.log # Tinker trainer logs -โ”œโ”€โ”€ env_{run_id}.log # Environment process logs -โ””โ”€โ”€ inference_tests/ # Inference test results - โ”œโ”€โ”€ test_{env}_{model}.jsonl - โ””โ”€โ”€ test_{env}_{model}.log -``` - -These are invaluable for debugging when training fails or produces unexpected results. diff --git a/website/docs/user-guide/features/skills.md b/website/docs/user-guide/features/skills.md index 9499e15d8068..9959bcce112d 100644 --- a/website/docs/user-guide/features/skills.md +++ b/website/docs/user-guide/features/skills.md @@ -351,6 +351,7 @@ Hermes can install directly from GitHub repositories and GitHub-based taps. This Default taps (browsable without any setup): - [openai/skills](https://github.com/openai/skills) - [anthropics/skills](https://github.com/anthropics/skills) +- [huggingface/skills](https://github.com/huggingface/skills) - [VoltAgent/awesome-agent-skills](https://github.com/VoltAgent/awesome-agent-skills) - [garrytan/gstack](https://github.com/garrytan/gstack) @@ -445,7 +446,7 @@ Important behavior: |-------|--------|--------| | `builtin` | Ships with Hermes | Always trusted | | `official` | `optional-skills/` in the repo | Builtin trust, no third-party warning | -| `trusted` | Trusted registries/repos such as `openai/skills`, `anthropics/skills` | More permissive policy than community sources | +| `trusted` | Trusted registries/repos such as `openai/skills`, `anthropics/skills`, `huggingface/skills` | More permissive policy than community sources | | `community` | Everything else (`skills.sh`, well-known endpoints, custom GitHub repos, most marketplaces) | Non-dangerous findings can be overridden with `--force`; `dangerous` verdicts stay blocked | ### Update lifecycle diff --git a/website/docs/user-guide/messaging/discord.md b/website/docs/user-guide/messaging/discord.md index a4530148cbfb..50f1641f0933 100644 --- a/website/docs/user-guide/messaging/discord.md +++ b/website/docs/user-guide/messaging/discord.md @@ -286,6 +286,8 @@ Discord behavior is controlled through two files: **`~/.hermes/.env`** for crede | `DISCORD_IGNORED_CHANNELS` | No | โ€” | Comma-separated channel IDs where the bot **never** responds, even when `@mentioned`. Takes priority over all other channel settings. | | `DISCORD_ALLOWED_CHANNELS` | No | โ€” | Comma-separated channel IDs. When set, the bot **only** responds in these channels (plus DMs if allowed). Overrides `config.yaml` `discord.allowed_channels`. Combine with `DISCORD_IGNORED_CHANNELS` to express allow/deny rules. | | `DISCORD_NO_THREAD_CHANNELS` | No | โ€” | Comma-separated channel IDs where the bot responds directly in the channel instead of creating a thread. Only relevant when `DISCORD_AUTO_THREAD` is `true`. | +| `DISCORD_HISTORY_BACKFILL` | No | `true` | When `true`, prepend recent channel scrollback (since the bot's last response) to the user message when the bot is mentioned. Recovers context the bot would otherwise miss with `require_mention`. Skipped in DMs and free-response channels. Set to `false` to disable. | +| `DISCORD_HISTORY_BACKFILL_LIMIT` | No | `50` | Maximum number of messages to scan backwards when assembling the backfill block. In practice the scan usually stops earlier โ€” at the bot's own last message in the channel. | | `DISCORD_REPLY_TO_MODE` | No | `"first"` | Controls reply-reference behavior: `"off"` โ€” never reply to the original message, `"first"` โ€” reply-reference on the first message chunk only (default), `"all"` โ€” reply-reference on every chunk. | | `DISCORD_ALLOW_MENTION_EVERYONE` | No | `false` | When `false` (default), the bot cannot ping `@everyone` or `@here` even if its response contains those tokens. Set to `true` to opt back in. See [Mention Control](#mention-control) below. | | `DISCORD_ALLOW_MENTION_ROLES` | No | `false` | When `false` (default), the bot cannot ping `@role` mentions. Set to `true` to allow. | @@ -309,6 +311,8 @@ discord: reactions: true # Add emoji reactions during processing ignored_channels: [] # Channel IDs where bot never responds no_thread_channels: [] # Channel IDs where bot responds without threading + history_backfill: true # Prepend recent channel scrollback on mention (default: true) + history_backfill_limit: 50 # Max messages to scan backwards (default: 50) channel_prompts: {} # Per-channel ephemeral system prompts allow_mentions: # What the bot is allowed to ping (safe defaults) everyone: false # @everyone / @here pings (default: false) @@ -437,6 +441,47 @@ Behavior: - If a message arrives inside a thread or forum post and that thread has no explicit entry, Hermes falls back to the parent channel/forum ID. - Prompts are applied ephemerally at runtime, so changing them affects future turns immediately without rewriting past session history. +#### `discord.history_backfill` + +**Type:** boolean โ€” **Default:** `true` + +When enabled, the bot recovers missed channel messages on each `@mention`. With `require_mention: true`, the bot only processes messages that tag it directly โ€” everything else in the channel is invisible to the session transcript. History backfill scans backwards through recent channel history when triggered, collecting messages between the bot's last response and the current mention, and includes them as context. + +Behavior by surface: + +- **Server channels** (with `require_mention: true`): backfill scans the channel since the bot's last response. Useful when other participants posted while the bot wasn't addressed. +- **Threads**: backfill scans the thread only โ€” Discord's `channel.history()` on a thread returns only that thread's messages, not the parent channel. This is the right scope because threads are usually self-contained conversations. +- **DMs**: skipped. Every DM message triggers the bot, so the session transcript is already complete โ€” there's no mention gap to fill. +- **Free-response channels** and **bot's own auto-created threads**: skipped for the same reason โ€” no mention gating means no gap. + +Per-user sessions (`group_sessions_per_user: true`, the default) also benefit: a user's session is missing the context posted by other channel participants and the user's own messages from before they tagged the bot. Backfill fills both gaps. + +```yaml +discord: + history_backfill: true # default +``` + +To turn it off: + +```yaml +discord: + history_backfill: false +``` + +> **Note:** Messages that arrive *while* the bot is processing (between a trigger and its response) are not captured. This is an accepted simplification โ€” the user can re-send or tag again. + +#### `discord.history_backfill_limit` + +**Type:** integer โ€” **Default:** `50` + +Maximum number of messages to scan backwards when recovering channel context. In practice the scan usually stops much earlier โ€” at the bot's own last message in the channel, which is the natural boundary between turns. This limit is a safety cap for cold starts and long gaps where no prior bot message exists in recent history. + +```yaml +discord: + history_backfill: true + history_backfill_limit: 50 +``` + #### `group_sessions_per_user` **Type:** boolean โ€” **Default:** `true` diff --git a/website/docs/user-guide/messaging/simplex.md b/website/docs/user-guide/messaging/simplex.md new file mode 100644 index 000000000000..60853acd9f84 --- /dev/null +++ b/website/docs/user-guide/messaging/simplex.md @@ -0,0 +1,99 @@ +# SimpleX Chat + +[SimpleX Chat](https://simplex.chat/) is a private, decentralised messaging platform where users own their contacts and groups. Unlike other platforms, SimpleX assigns no persistent user IDs โ€” every contact is identified by an opaque internal ID generated at connection time, which makes it one of the most private messengers available. + +## Prerequisites + +- The **simplex-chat** CLI installed and running as a daemon +- Python package **websockets** (`pip install websockets`) + +## Install simplex-chat + +Download the latest release from the [simplex-chat GitHub releases](https://github.com/simplex-chat/simplex-chat/releases) page, or via Docker: + +```bash +# Linux / macOS binary +curl -L https://github.com/simplex-chat/simplex-chat/releases/latest/download/simplex-chat-ubuntu-22_04-x86-64 -o simplex-chat +chmod +x simplex-chat + +# Or Docker +docker run -p 5225:5225 simplexchat/simplex-chat -p 5225 +``` + +## Start the daemon + +```bash +simplex-chat -p 5225 +``` + +The daemon listens on WebSocket at `ws://127.0.0.1:5225` by default. + +## Configure Hermes + +### Via setup wizard + +```bash +hermes setup gateway +``` + +Select **SimpleX Chat** and follow the prompts. + +### Via environment variables + +Add these to `~/.hermes/.env`: + +``` +SIMPLEX_WS_URL=ws://127.0.0.1:5225 +SIMPLEX_ALLOWED_USERS=, +SIMPLEX_HOME_CHANNEL= +``` + +| Variable | Required | Description | +|---|---|---| +| `SIMPLEX_WS_URL` | Yes | WebSocket URL of the simplex-chat daemon | +| `SIMPLEX_ALLOWED_USERS` | Recommended | Comma-separated contact IDs allowed to use the agent | +| `SIMPLEX_ALLOW_ALL_USERS` | Optional | Set `true` to allow every contact (use carefully) | +| `SIMPLEX_HOME_CHANNEL` | Optional | Default contact ID for cron job delivery | +| `SIMPLEX_HOME_CHANNEL_NAME` | Optional | Human label for the home channel | + +## Find your contact ID + +After starting the daemon, open a conversation with your agent contact. The contact ID will appear in session logs or via `hermes send_message action=list`. + +## Authorization + +By default **all contacts are denied**. You must either: + +1. Set `SIMPLEX_ALLOWED_USERS` to a comma-separated list of contact IDs, or +2. Use **DM pairing** โ€” send any message to the bot and it will reply with a pairing code. Enter that code via `hermes gateway pair`. + +## Using SimpleX with cron jobs + +```python +cronjob( + action="create", + schedule="every 1h", + deliver="simplex", # uses SIMPLEX_HOME_CHANNEL + prompt="Check for alerts and summarise." +) +``` + +Or target a specific contact: + +```python +send_message(target="simplex:", message="Done!") +``` + +## Privacy notes + +- SimpleX never reveals phone numbers or email addresses โ€” contacts use opaque IDs +- The connection between Hermes and the daemon is local WebSocket (`ws://127.0.0.1:5225`) โ€” no data leaves your machine +- Messages are end-to-end encrypted by the SimpleX protocol before reaching the daemon + +## Troubleshooting + +**"Cannot reach daemon"** โ€” Ensure `simplex-chat -p 5225` is running and the port matches `SIMPLEX_WS_URL`. + +**"websockets not installed"** โ€” Run `pip install websockets`. + +**Messages not received** โ€” Check that the contact's ID is in `SIMPLEX_ALLOWED_USERS` or approve them via DM pairing. diff --git a/website/docs/user-guide/skills/optional/mlops/mlops-hermes-atropos-environments.md b/website/docs/user-guide/skills/optional/mlops/mlops-hermes-atropos-environments.md deleted file mode 100644 index 7cce92a7e0e8..000000000000 --- a/website/docs/user-guide/skills/optional/mlops/mlops-hermes-atropos-environments.md +++ /dev/null @@ -1,323 +0,0 @@ ---- -title: "Hermes Atropos Environments โ€” Build, test, and debug Hermes Agent RL environments for Atropos training" -sidebar_label: "Hermes Atropos Environments" -description: "Build, test, and debug Hermes Agent RL environments for Atropos training" ---- - -{/* This page is auto-generated from the skill's SKILL.md by website/scripts/generate-skill-docs.py. Edit the source SKILL.md, not this page. */} - -# Hermes Atropos Environments - -Build, test, and debug Hermes Agent RL environments for Atropos training. Covers the HermesAgentBaseEnv interface, reward functions, agent loop integration, evaluation with tools, wandb logging, and the three CLI modes (serve/process/evaluate). Use when creating, reviewing, or fixing RL environments in the hermes-agent repo. - -## Skill metadata - -| | | -|---|---| -| Source | Optional โ€” install with `hermes skills install official/mlops/hermes-atropos-environments` | -| Path | `optional-skills/mlops/hermes-atropos-environments` | -| Version | `1.1.0` | -| Author | Hermes Agent | -| License | MIT | -| Platforms | linux, macos, windows | -| Tags | `atropos`, `rl`, `environments`, `training`, `reinforcement-learning`, `reward-functions` | -| Related skills | [`axolotl`](/docs/user-guide/skills/optional/mlops/mlops-training-axolotl), [`fine-tuning-with-trl`](/docs/user-guide/skills/optional/mlops/mlops-training-trl-fine-tuning), `lm-evaluation-harness` | - -## Reference: full SKILL.md - -:::info -The following is the complete skill definition that Hermes loads when this skill is triggered. This is what the agent sees as instructions when the skill is active. -::: - -# Hermes Agent Atropos Environments - -Guide for building RL environments in the hermes-agent repo that integrate with the Atropos training framework. - -## Architecture Overview - - -``` -Atropos BaseEnv (atroposlib/envs/base.py) - โ””โ”€โ”€ HermesAgentBaseEnv (environments/hermes_base_env.py) - โ”œโ”€โ”€ Handles agent loop orchestration - โ”œโ”€โ”€ Handles tool resolution per group - โ”œโ”€โ”€ Handles ToolContext for reward verification - โ””โ”€โ”€ YOUR ENVIRONMENT (environments/your_env.py) - Only implements: setup, get_next_item, format_prompt, - compute_reward, evaluate, wandb_log -``` - - -Hermes environments are special because they run a **multi-turn agent loop with tool calling** โ€” not just single-turn completions. The base env handles the loop; you implement the task and scoring. - -## File Locations - -| File | Purpose | -|------|---------| -| `environments/hermes_base_env.py` | Base class with agent loop + tool resolution | -| `environments/agent_loop.py` | `HermesAgentLoop` + `AgentResult` dataclass | -| `environments/tool_context.py` | `ToolContext` for reward verification | -| `environments/tool_call_parsers.py` | Phase 2 tool call parsers (hermes, mistral, etc.) | -| `environments/your_env.py` | Your environment implementation | - -## Inference Setup โ€” Ask the User First - -**IMPORTANT:** Before running any test, evaluation, or data generation command, always ask the user how they want to handle inference. Do NOT assume OpenRouter or any specific endpoint. Present these options: - -1. **OpenRouter** โ€” Ask which model they want to use (e.g., `anthropic/claude-sonnet-4.5`, `google/gemini-2.5-pro`, `meta-llama/llama-3.3-70b-instruct`, etc.). Requires `OPENROUTER_API_KEY` in environment. -2. **Self-hosted VLLM endpoint** โ€” Ask for their base URL (e.g., `http://localhost:8000/v1`) and model name. Set `--openai.server_type vllm`. -3. **Other OpenAI-compatible API** โ€” Ask for the base URL, model name, and any required API key. Set `--openai.server_type openai` and `--openai.health_check false`. -4. **Local Atropos training server** โ€” For `serve` mode with a live training loop. Default `http://localhost:8000/v1`. - -Once the user tells you their setup, use those values in all CLI commands for that session. Example prompts: - -> "Before I run this, how would you like to handle inference? -> 1. OpenRouter (I'll need your preferred model, e.g. claude-sonnet-4.5) -> 2. A self-hosted VLLM endpoint (give me the URL and model name) -> 3. Another OpenAI-compatible API (give me the URL, model, and any auth details) -> 4. Local Atropos training server (serve mode)" - -### Key flags by provider: - -| Provider | `--openai.server_type` | `--openai.health_check` | `--openai.api_key` | -|----------|----------------------|------------------------|-------------------| -| OpenRouter | `openai` | `false` | `$OPENROUTER_API_KEY` | -| VLLM (self-hosted) | `vllm` | (default) | (not needed) | -| Other OpenAI-compatible | `openai` | `false` | As needed | -| Local Atropos | (default) | (default) | (not needed) | - -## Required Methods - -### 1. `setup()` โ€” Load dataset and initialize state - -```python -async def setup(self) -> None: - """Called once at startup. Load datasets, initialize state.""" - # Try HuggingFace first, fallback to built-in samples - try: - from datasets import load_dataset - ds = load_dataset("your/dataset", split="test") - self._items = [...] - except Exception: - self._items = BUILTIN_SAMPLES - - # Always split into train/eval - random.shuffle(self._items) - eval_size = max(20, int(len(self._items) * 0.1)) - self._eval_items = self._items[:eval_size] - self._items = self._items[eval_size:] -``` - -### 2. `get_next_item()` โ€” Return next training item - -```python -async def get_next_item(self) -> dict: - """Return next item, cycling through dataset.""" - item = self._items[self._index % len(self._items)] - self._index += 1 - return item -``` - -### 3. `format_prompt(item)` โ€” Convert item to user message - -```python -def format_prompt(self, item: dict) -> str: - """Convert a dataset item into the user-facing prompt.""" - return f"Research this question: {item['question']}" -``` - -### 4. `compute_reward(item, result, ctx)` โ€” Score the rollout - -**CRITICAL**: `result` is an `AgentResult`, NOT a dict. It has these attributes: -- `result.messages` โ€” List of message dicts (OpenAI format) -- `result.turns_used` โ€” Number of LLM calls made -- `result.finished_naturally` โ€” True if model stopped voluntarily -- `result.tool_errors` โ€” List of ToolError objects - -**AgentResult does NOT have**: `final_response`, `tool_calls`, `tools_used`. -You must extract these from `result.messages`: - -```python -async def compute_reward(self, item, result: AgentResult, ctx: ToolContext) -> float: - # Extract final response (last assistant message with content) - final_response = "" - tools_used = [] - for msg in reversed(result.messages): - if msg.get("role") == "assistant" and msg.get("content") and not final_response: - final_response = msg["content"] - if msg.get("role") == "assistant" and msg.get("tool_calls"): - for tc in msg["tool_calls"]: - fn = tc.get("function", {}) if isinstance(tc, dict) else {} - name = fn.get("name", "") - if name: - tools_used.append(name) - - # Score using LLM judge, heuristic, or ToolContext verification - correctness = await self._llm_judge(item, final_response) - return correctness -``` - -`ctx` (ToolContext) gives you terminal/file access to the agent's sandbox for verification: -```python -# Run tests in the agent's sandbox -result = ctx.terminal("pytest /workspace/test.py") -return 1.0 if result["exit_code"] == 0 else 0.0 -``` - -### 5. `evaluate()` โ€” Periodic evaluation with full agent loop - -**MUST use the full agent loop with tools**, not single-turn chat_completion. -The whole point of hermes-agent environments is agentic evaluation: - -```python -async def evaluate(self, *args, **kwargs) -> None: - import time, uuid - from environments.agent_loop import HermesAgentLoop - from environments.tool_context import ToolContext - - start_time = time.time() - tools, valid_names = self._resolve_tools_for_group() - samples = [] - - for item in self._eval_items[:self.config.eval_size]: - task_id = str(uuid.uuid4()) - messages = [] - if self.config.system_prompt: - messages.append({"role": "system", "content": self.config.system_prompt}) - messages.append({"role": "user", "content": self.format_prompt(item)}) - - agent = HermesAgentLoop( - server=self.server, - tool_schemas=tools, - valid_tool_names=valid_names, - max_turns=self.config.max_agent_turns, - task_id=task_id, - temperature=0.0, # Deterministic for eval - max_tokens=self.config.max_token_length, - extra_body=self.config.extra_body, - ) - result = await agent.run(messages) - - ctx = ToolContext(task_id) - try: - reward = await self.compute_reward(item, result, ctx) - finally: - ctx.cleanup() - - samples.append({"prompt": ..., "response": ..., "reward": reward}) - - eval_metrics = {"eval/mean_reward": ...} - await self.evaluate_log(metrics=eval_metrics, samples=samples, - start_time=start_time, end_time=time.time()) -``` - -### 6. `wandb_log()` โ€” Custom metrics logging - -Always call `super().wandb_log()` at the end: - -```python -async def wandb_log(self, wandb_metrics=None): - if wandb_metrics is None: - wandb_metrics = {} - if self._reward_buffer: - n = len(self._reward_buffer) - wandb_metrics["train/mean_reward"] = sum(self._reward_buffer) / n - self._reward_buffer.clear() - await super().wandb_log(wandb_metrics) # MUST call super -``` - -**Pitfall**: `compute_reward` appends to metric buffers. During eval, this pollutes training metrics. Roll back buffer entries added during eval. - -## Config Class - -Always create a custom config subclass with Pydantic Field descriptors. Key inherited fields you can tune: `enabled_toolsets`, `max_agent_turns`, `agent_temperature`, `system_prompt`, `terminal_backend`, `group_size`, `steps_per_eval`, `total_steps`. - -## config_init() โ€” Default Configuration - -Classmethod returning `(YourEnvConfig, [APIServerConfig(...)])`. Set server_type to "openai" for OpenRouter/external APIs. Load API key from environment variable. - -## Three CLI Modes - -```bash -# SERVE โ€” Full training loop (connects to Atropos API server) -python environments/my_env.py serve --openai.base_url http://localhost:8000/v1 - -# PROCESS โ€” Offline data generation (saves JSONL) -python environments/my_env.py process --env.total_steps 10 --env.group_size 1 \ - --env.use_wandb false --env.data_path_to_save_groups output.jsonl \ - --openai.base_url "" \ - --openai.model_name "" \ - --openai.server_type --openai.health_check false - -# EVALUATE โ€” Standalone eval (runs setup + evaluate only) -python environments/my_env.py evaluate --env.eval_size 20 \ - --env.data_dir_to_save_evals /tmp/eval_results \ - --openai.base_url "" \ - --openai.model_name "" \ - --openai.server_type --openai.health_check false -``` - -Config priority: CLI args > YAML file > config_init() defaults. - -## Common Pitfalls - -1. **AgentResult has .messages, not .final_response** โ€” Extract the final response by iterating reversed(result.messages) looking for the last assistant message with content. - -2. **evaluate() must use HermesAgentLoop, not chat_completion** โ€” Single-turn chat_completion has no tools. The whole point of hermes-agent benchmarks is agentic evaluation with tool use. - -3. **Don't call _llm_judge twice** โ€” If compute_reward already calls it, extract the score from the buffer instead of calling judge separately in evaluate(). - -4. **Eval pollutes training buffers** โ€” compute_reward appends to metric buffers. During eval, roll back buffer entries to keep training metrics clean. - -5. **Always set health_check=false for OpenRouter** โ€” OpenRouter has no /health endpoint. - -6. **Set data_dir_to_save_evals in evaluate mode** โ€” Without it, results aren't saved. - -7. **default_toolsets class variable vs enabled_toolsets config** โ€” The class variable is a hint; the config field is what actually controls tool resolution. - -8. **Tool call parsing in messages** โ€” Tool calls are dicts with `{"function": {"name": ..., "arguments": ...}}`. Always check `isinstance(tc, dict)`. - -9. **ToolContext.cleanup()** โ€” Always call in a finally block to release sandbox resources. - -10. **server_type must be "openai" for external APIs** โ€” Without it, Atropos assumes a local VLLM server. - -11. **Always ask the user for their inference setup** โ€” Never hardcode or assume a specific provider/model. See the "Inference Setup" section above. - -## Reward Function Patterns - -### LLM Judge (for open-ended tasks) -Use `self.server.chat_completion()` with a scoring prompt. Parse JSON response for score float. Always include a heuristic fallback (keyword overlap) for when the judge call fails. - -### Binary Verification (for code/terminal tasks) -Use `ctx.terminal("pytest test.py -q")` to run tests in the agent's sandbox. Return 1.0 for pass, 0.0 for fail. - -### Multi-Signal (combine multiple indicators) -Weight correctness (0.6) + tool usage (0.2) + efficiency (0.2) + optional bonuses. Clamp to [0, 1]. - -## Testing Your Environment - -1. **Import test**: `python -c "from environments.my_env import MyEnv; print('OK')"` -2. **Ask the user for inference setup** (see "Inference Setup" section above) -3. **Process mode** (1 item): Verify JSONL output has valid tokens, masks, scores -4. **Evaluate mode**: Verify full agent loop runs with tools, metrics logged correctly -5. **Check reward range**: Scores should be in [0, 1], not all identical - -## Minimum Implementation Checklist - -```python -class MyEnv(HermesAgentBaseEnv): - name = "my-env" - env_config_cls = MyEnvConfig - - @classmethod - def config_init(cls): ... # Default server + env config - async def setup(self): ... # Load dataset + train/eval split - async def get_next_item(self): ... # Cycle through training items - def format_prompt(self, item): ... # Item โ†’ user message string - async def compute_reward(self, item, result, ctx): ... # Score rollout - async def evaluate(self, *args, **kwargs): ... # Full agent loop eval - async def wandb_log(self, metrics=None): ... # Custom metrics + super() - -if __name__ == "__main__": - MyEnv.cli() -``` diff --git a/website/sidebars.ts b/website/sidebars.ts index 37557df8d118..a8d893d6e72c 100644 --- a/website/sidebars.ts +++ b/website/sidebars.ts @@ -103,7 +103,6 @@ const sidebars: SidebarsConfig = { type: 'category', label: 'Advanced', items: [ - 'user-guide/features/rl-training', 'user-guide/features/spotify', ], }, @@ -146,6 +145,7 @@ const sidebars: SidebarsConfig = { 'user-guide/messaging/teams-meetings', 'user-guide/messaging/msgraph-webhook', 'user-guide/messaging/line', + 'user-guide/messaging/simplex', 'user-guide/messaging/open-webui', 'user-guide/messaging/webhooks', ], @@ -238,7 +238,6 @@ const sidebars: SidebarsConfig = { 'developer-guide/tools-runtime', 'developer-guide/acp-internals', 'developer-guide/cron-internals', - 'developer-guide/environments', 'developer-guide/trajectory-format', ], },