diff --git a/.github/workflows/publish-components.yml b/.github/workflows/publish-components.yml index 8a6e99398..86090b048 100644 --- a/.github/workflows/publish-components.yml +++ b/.github/workflows/publish-components.yml @@ -22,6 +22,7 @@ on: - egress - controller - task-executor + - image-committer default: 'execd' image_tag: description: 'Docker image tag' @@ -35,6 +36,7 @@ on: - 'docker/egress/**' - 'k8s/controller/**' - 'k8s/task-executor/**' + - 'k8s/image-committer/**' jobs: publish: @@ -117,6 +119,8 @@ jobs: cd kubernetes elif [ "$COMPONENT" == "task-executor" ]; then cd kubernetes + elif [ "$COMPONENT" == "image-committer" ]; then + cd kubernetes else cd sandboxes/$COMPONENT fi diff --git a/.github/workflows/publish-js-sdks.yml b/.github/workflows/publish-js-sdks.yml index d051b0a1a..bfd3173b5 100644 --- a/.github/workflows/publish-js-sdks.yml +++ b/.github/workflows/publish-js-sdks.yml @@ -42,14 +42,13 @@ jobs: - name: Set up pnpm uses: pnpm/action-setup@v4 with: - version: latest - - - name: Enable corepack - run: corepack enable + version: 9.15.0 + run_install: false - name: Get pnpm store path id: pnpm-store - run: echo "STORE_PATH=$(corepack pnpm store path)" >> "$GITHUB_OUTPUT" + working-directory: sdks + run: echo "STORE_PATH=$(pnpm store path)" >> "$GITHUB_OUTPUT" - name: Cache pnpm store uses: actions/cache@v5 @@ -60,11 +59,11 @@ jobs: - name: Install workspace dependencies working-directory: sdks - run: corepack pnpm install --frozen-lockfile + run: pnpm install --frozen-lockfile - name: Build SDK working-directory: sdks - run: corepack pnpm --filter ${{ matrix.sdk.packageName }}... --sort run build + run: pnpm --filter ${{ matrix.sdk.packageName }}... --sort run build - name: Pack SDK if: startsWith(github.ref, format('refs/tags/js/{0}/v', matrix.sdk.tagPrefix)) @@ -74,7 +73,7 @@ jobs: set -euo pipefail PACK_DIR="${GITHUB_WORKSPACE}/dist/npm/${{ matrix.sdk.name }}" mkdir -p "$PACK_DIR" - corepack pnpm pack --pack-destination "$PACK_DIR" + pnpm pack --pack-destination "$PACK_DIR" PACKAGE_TARBALL="$(find "$PACK_DIR" -maxdepth 1 -name '*.tgz' -print -quit)" if [[ -z "$PACKAGE_TARBALL" ]]; then echo "No package tarball was produced in $PACK_DIR" >&2 @@ -93,4 +92,4 @@ jobs: env: NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} run: | - corepack pnpm publish "${{ steps.pack.outputs.tarball }}" --access public --no-git-checks + pnpm publish "${{ steps.pack.outputs.tarball }}" --access public --no-git-checks diff --git a/README.md b/README.md index 8381e37aa..84f59a3e7 100644 --- a/README.md +++ b/README.md @@ -122,6 +122,7 @@ Quick start: osb config init osb config set connection.domain localhost:8080 osb config set connection.protocol http +osb config set connection.api_key osb sandbox create --image python:3.12 --timeout 30m -o json osb command run -o raw -- python -c "print(1 + 1)" ``` diff --git a/cli/README.md b/cli/README.md index 928d1523a..28647264f 100644 --- a/cli/README.md +++ b/cli/README.md @@ -50,6 +50,7 @@ opensandbox-server osb config init osb config set connection.domain localhost:8080 osb config set connection.protocol http +osb config set connection.api_key osb config show -o json ``` diff --git a/cli/uv.lock b/cli/uv.lock index f957c991f..be0135be6 100644 --- a/cli/uv.lock +++ b/cli/uv.lock @@ -233,11 +233,11 @@ wheels = [ [[package]] name = "idna" -version = "3.11" +version = "3.15" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582, upload-time = "2025-10-12T14:55:20.501Z" } +sdist = { url = "https://files.pythonhosted.org/packages/82/77/7b3966d0b9d1d31a36ddf1746926a11dface89a83409bf1483f0237aa758/idna-3.15.tar.gz", hash = "sha256:ca962446ea538f7092a95e057da437618e886f4d349216d2b1e294abfdb65fdc", size = 199245, upload-time = "2026-05-12T22:45:57.011Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" }, + { url = "https://files.pythonhosted.org/packages/d2/23/408243171aa9aaba178d3e2559159c24c1171a641aa83b67bdd3394ead8e/idna-3.15-py3-none-any.whl", hash = "sha256:048adeaf8c2d788c40fee287673ccaa74c24ffd8dcf09ffa555a2fbb59f10ac8", size = 72340, upload-time = "2026-05-12T22:45:55.733Z" }, ] [[package]] diff --git a/components/egress/docs/mitmproxy-transparent.md b/components/egress/docs/mitmproxy-transparent.md index 1253a1745..3df101527 100644 --- a/components/egress/docs/mitmproxy-transparent.md +++ b/components/egress/docs/mitmproxy-transparent.md @@ -30,8 +30,8 @@ By default, mitmproxy listens on `18081` and transparent redirect rules are set # Optional: change listening port (default: 18081) export OPENSANDBOX_EGRESS_MITMPROXY_PORT=18081 -# Optional: enable mitm addon script (e.g., inject request headers) -export OPENSANDBOX_EGRESS_MITMPROXY_SCRIPT=/opt/opensandbox/mitmscripts/add_header.py +# Optional: load an additional user-defined mitm addon (loaded after the system addon) +export OPENSANDBOX_EGRESS_MITMPROXY_SCRIPT=/path/to/your/addon.py # Optional: bypass decryption for selected domains (semicolon-separated regex list) export OPENSANDBOX_EGRESS_MITMPROXY_IGNORE_HOSTS='.*\.log\.aliyuncs\.com;.*\.example\.internal' @@ -43,7 +43,7 @@ export OPENSANDBOX_EGRESS_MITMPROXY_IGNORE_HOSTS='.*\.log\.aliyuncs\.com;.*\.exa |------|----------|------|--------| | `OPENSANDBOX_EGRESS_MITMPROXY_TRANSPARENT` | Yes | Enable transparent mitmproxy (`1/true/on`, etc.) | Disabled | | `OPENSANDBOX_EGRESS_MITMPROXY_PORT` | No | mitmdump listen port; `iptables` redirects `80/443` here | `18081` | -| `OPENSANDBOX_EGRESS_MITMPROXY_SCRIPT` | No | mitm addon script path (`-s`) | Empty | +| `OPENSANDBOX_EGRESS_MITMPROXY_SCRIPT` | No | Additional user mitm addon script path (`-s`); loaded after the system addon | Empty | | `OPENSANDBOX_EGRESS_MITMPROXY_IGNORE_HOSTS` | No | Host/IP regex list for TLS pass-through (`;` separated) | Empty | | `OPENSANDBOX_EGRESS_MITMPROXY_CONFDIR` | No | mitm config and CA directory (passed as `--set confdir=`, also used as `HOME`) | Default directory under `/var/lib/mitmproxy` | | `OPENSANDBOX_EGRESS_MITMPROXY_UPSTREAM_TRUST_DIR` | No | Trust directory for upstream TLS verification (OpenSSL style) | `/etc/ssl/certs` | @@ -62,23 +62,31 @@ Notes: export OPENSANDBOX_EGRESS_MITMPROXY_TRANSPARENT=true ``` -### 2) Enable with Header Injection +### 2) System Addon (Always On) + +The bundled system addon at `/var/egress/mitmscripts/system.py` is shipped in the egress image and loaded automatically whenever transparent mode is enabled. It stays wire-transparent (no headers added or altered) and currently provides: + +- Forces streaming (`flow.response.stream = True`) for SSE (`text/event-stream`) and chunked responses, so each chunk is forwarded immediately instead of being buffered up to the `stream_large_bodies=1m` threshold (critical for LLM streaming UX). + +The system addon is always loaded and cannot be disabled via configuration. To override its behavior, supply a user addon via `OPENSANDBOX_EGRESS_MITMPROXY_SCRIPT`; user addons are loaded after the system addon and may observe or override its hooks. + +### 3) Add a User Addon Alongside the System Addon ```bash export OPENSANDBOX_EGRESS_MITMPROXY_TRANSPARENT=true -export OPENSANDBOX_EGRESS_MITMPROXY_SCRIPT=/opt/opensandbox/mitmscripts/add_header.py +export OPENSANDBOX_EGRESS_MITMPROXY_SCRIPT=/path/to/your/addon.py ``` -Built-in example script: `/opt/opensandbox/mitmscripts/add_header.py` (adds `X-OpenSandbox-Egress: 1`). +The user addon is loaded after the system addon (`-s system.py -s user.py`), so user hooks observe and may override system behavior. -### 3) Bypass Decryption for Specific Domains (e.g. log upload) +### 4) Bypass Decryption for Specific Domains (e.g. log upload) ```bash export OPENSANDBOX_EGRESS_MITMPROXY_TRANSPARENT=true export OPENSANDBOX_EGRESS_MITMPROXY_IGNORE_HOSTS='.*\.log\.aliyuncs\.com' ``` -### 4) Use a Fixed CA (consistent fingerprint across replicas) +### 5) Use a Fixed CA (consistent fingerprint across replicas) If CA files already exist in `confdir`, mitmproxy reuses them instead of regenerating on each startup. Typical paths: diff --git a/components/egress/mitmscripts/add_header.py b/components/egress/mitmscripts/add_header.py deleted file mode 100644 index c3a3430b2..000000000 --- a/components/egress/mitmscripts/add_header.py +++ /dev/null @@ -1,12 +0,0 @@ -# Example mitmproxy addon: add a static header to every request. -# Use: OPENSANDBOX_EGRESS_MITMPROXY_SCRIPT=/opt/opensandbox/mitmscripts/add_header.py -# Optional addon: OPENSANDBOX_EGRESS_MITMPROXY_SCRIPT can point to this file. -from mitmproxy import http - -HEADER_NAME = "X-OpenSandbox-Egress" -HEADER_VALUE = "1" - - -def request(flow: http.HTTPFlow) -> None: - if flow.request: - flow.request.headers[HEADER_NAME] = HEADER_VALUE diff --git a/components/egress/mitmscripts/system.py b/components/egress/mitmscripts/system.py new file mode 100644 index 000000000..71a988896 --- /dev/null +++ b/components/egress/mitmscripts/system.py @@ -0,0 +1,36 @@ +# Copyright 2026 Alibaba Group Holding Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# OpenSandbox egress system addon. +# +# Always loaded by the egress mitmproxy launcher. Stays transparent on the +# wire (does not add or alter headers that would reveal the proxy to peers). +# +# Behavior: +# Forces streaming for SSE / chunked responses so each chunk is forwarded +# immediately, bypassing the stream_large_bodies=1m buffer set in launch.go +# (which otherwise stalls LLM-style small-chunk streams). +# +# User-defined addons can be loaded alongside this script via +# OPENSANDBOX_EGRESS_MITMPROXY_SCRIPT. +from mitmproxy import http + + +def responseheaders(flow: http.HTTPFlow) -> None: + if flow.response is None: + return + content_type = flow.response.headers.get("content-type", "").lower() + transfer_encoding = flow.response.headers.get("transfer-encoding", "").lower() + if "text/event-stream" in content_type or "chunked" in transfer_encoding: + flow.response.stream = True diff --git a/components/egress/pkg/mitmproxy/launch.go b/components/egress/pkg/mitmproxy/launch.go index 0065d672e..2a1b4e205 100644 --- a/components/egress/pkg/mitmproxy/launch.go +++ b/components/egress/pkg/mitmproxy/launch.go @@ -34,11 +34,16 @@ const RunAsUser = "mitmproxy" // Loopback: transparent mode receives via REDIRECT; do not listen on 0.0.0.0 in the netns. const listenHostLoopback = "127.0.0.1" +// systemScriptPath: bundled system addon shipped via the egress Dockerfile +// (COPY components/egress/mitmscripts /var/egress/mitmscripts). Always loaded. +const systemScriptPath = "/var/egress/mitmscripts/system.py" + // Config: mitmdump --mode transparent; UserName must match iptables ! --uid-owner, ConfDir is mitm state/CA. type Config struct { ListenPort int UserName string ConfDir string + // ScriptPath is an optional user-supplied addon, loaded after the system addon. ScriptPath string // OnExit is called (if non-nil) when mitmdump exits. Called from a background goroutine. OnExit func(error) @@ -120,8 +125,10 @@ func Launch(cfg Config) (*Running, error) { args = append(args, "--set", "confdir="+cd) homeEnv = cd } - if strings.TrimSpace(cfg.ScriptPath) != "" { - args = append(args, "-s", strings.TrimSpace(cfg.ScriptPath)) + // Load the system addon first so user addons can observe / override its hooks. + args = append(args, "-s", systemScriptPath) + if user := strings.TrimSpace(cfg.ScriptPath); user != "" { + args = append(args, "-s", user) } // Upstream passthrough: each pattern becomes --set ignore_hosts= (regex; IP ranges are practical in transparent mode). diff --git a/components/egress/policy_server.go b/components/egress/policy_server.go index a62eefc5a..8d85746b8 100644 --- a/components/egress/policy_server.go +++ b/components/egress/policy_server.go @@ -147,8 +147,10 @@ func (s *policyServer) handlePolicy(w http.ResponseWriter, r *http.Request) { s.handlePost(w, r) case http.MethodPatch: s.handlePatch(w, r) + case http.MethodDelete: + s.handleDelete(w, r) default: - w.Header().Set("Allow", "GET, POST, PUT, PATCH") + w.Header().Set("Allow", "GET, POST, PUT, PATCH, DELETE") http.Error(w, "method not allowed", http.StatusMethodNotAllowed) } } @@ -222,15 +224,16 @@ func (s *policyServer) handlePatch(w http.ResponseWriter, r *http.Request) { defer s.mu.Unlock() raw, err := readPolicyRequestBody(r) - if err != nil || raw == "" { - if err != nil { - logEgressUpdateFailedWarn(fmt.Sprintf("failed to read body: %v", err)) - } else { - logEgressUpdateFailedWarn("empty patch body") - } + if err != nil { + logEgressUpdateFailedWarn(fmt.Sprintf("failed to read body: %v", err)) http.Error(w, fmt.Sprintf("failed to read body: %v", err), http.StatusBadRequest) return } + if raw == "" { + logEgressUpdateFailedWarn("empty patch body") + http.Error(w, "empty body", http.StatusBadRequest) + return + } var patchRules []policy.EgressRule if err := json.Unmarshal([]byte(raw), &patchRules); err != nil { @@ -268,6 +271,84 @@ func (s *policyServer) handlePatch(w http.ResponseWriter, r *http.Request) { }) } +func (s *policyServer) handleDelete(w http.ResponseWriter, r *http.Request) { + defer r.Body.Close() + s.mu.Lock() + defer s.mu.Unlock() + + raw, err := readPolicyRequestBody(r) + if err != nil { + logEgressUpdateFailedWarn(fmt.Sprintf("failed to read body: %v", err)) + http.Error(w, fmt.Sprintf("failed to read body: %v", err), http.StatusBadRequest) + return + } + if raw == "" { + logEgressUpdateFailedWarn("empty delete body") + http.Error(w, "empty body", http.StatusBadRequest) + return + } + + var targets []string + if err := json.Unmarshal([]byte(raw), &targets); err != nil { + logEgressUpdateFailedWarn(fmt.Sprintf("invalid delete targets: %v", err)) + http.Error(w, fmt.Sprintf("invalid delete targets: %v", err), http.StatusBadRequest) + return + } + if len(targets) == 0 { + logEgressUpdateFailedWarn("empty delete targets array") + http.Error(w, "invalid delete targets: empty array", http.StatusBadRequest) + return + } + + base := s.proxy.CurrentPolicy() + if base == nil { + base = policy.DefaultDenyPolicy() + } + oldCount := len(base.Egress) + newEgress, removedRules := removeRulesByTarget(base.Egress, targets) + removed := oldCount - len(newEgress) + + if removed == 0 { + mode := modeFromPolicy(base) + writeJSON(w, http.StatusOK, policyStatusResponse{ + Status: "ok", + Mode: mode, + EnforcementMode: s.enforcementMode, + Reason: "no matching targets found", + }) + return + } + + rawMerged, err := json.Marshal(policy.NetworkPolicy{ + DefaultAction: base.DefaultAction, + Egress: newEgress, + }) + if err != nil { + logEgressUpdateFailedError(fmt.Sprintf("failed to marshal updated policy: %v", err)) + http.Error(w, fmt.Sprintf("internal error: %v", err), http.StatusInternalServerError) + return + } + newPolicy, err := policy.ParsePolicy(string(rawMerged)) + if err != nil { + logEgressUpdateFailedError(fmt.Sprintf("invalid policy after delete: %v", err)) + http.Error(w, fmt.Sprintf("internal error: %v", err), http.StatusInternalServerError) + return + } + + mode := modeFromPolicy(newPolicy) + log.Infof("policy API: deleting %d egress rule(s) by target, removed=%d, mode=%s, enforcement=%s", len(targets), removed, mode, s.enforcementMode) + if !s.commitPolicy(r.Context(), w, newPolicy, "delete") { + return + } + logEgressUpdated(newPolicy.DefaultAction, removedRules) + log.Infof("policy API: delete applied successfully") + writeJSON(w, http.StatusOK, policyStatusResponse{ + Status: "ok", + Mode: mode, + EnforcementMode: s.enforcementMode, + }) +} + // commitPolicy applies one logical change: optional disk persist → merge always file rules → nft // static (with nameserver allow-IPs) → then update in-memory user policy (POST/PATCH/GET view). func (s *policyServer) commitPolicy(ctx context.Context, w http.ResponseWriter, pol *policy.NetworkPolicy, op string) bool { diff --git a/components/egress/policy_server_test.go b/components/egress/policy_server_test.go index 74e33771e..a2a0aacbd 100644 --- a/components/egress/policy_server_test.go +++ b/components/egress/policy_server_test.go @@ -245,6 +245,150 @@ func TestHandlePatch_RejectsWhenOverMaxEgressRules(t *testing.T) { require.Len(t, proxy.updated.Egress, 2, "policy should be unchanged") } +func TestHandleDelete_RemovesMatchingTargets(t *testing.T) { + initial := &policy.NetworkPolicy{ + DefaultAction: policy.ActionDeny, + Egress: []policy.EgressRule{ + {Action: policy.ActionAllow, Target: "example.com"}, + {Action: policy.ActionDeny, Target: "blocked.com"}, + {Action: policy.ActionAllow, Target: "keep.com"}, + }, + } + proxy := &stubProxy{updated: initial} + nft := &stubNft{} + srv := &policyServer{proxy: proxy, nft: nft, enforcementMode: "dns+nft"} + + body := `["blocked.com","nonexistent.com"]` + req := httptest.NewRequest(http.MethodDelete, "/policy", strings.NewReader(body)) + w := httptest.NewRecorder() + + srv.handlePolicy(w, req) + + resp := w.Result() + require.Equal(t, http.StatusOK, resp.StatusCode, "expected 200 OK") + require.Equal(t, 1, nft.calls, "expected nft ApplyStatic called once") + require.NotNil(t, proxy.updated, "expected proxy policy updated") + require.Equal(t, policy.ActionDeny, proxy.updated.DefaultAction, "defaultAction should be preserved") + require.Len(t, proxy.updated.Egress, 2, "expected 2 rules remaining after delete") + require.Equal(t, policy.ActionAllow, proxy.updated.Egress[0].Action) + require.Equal(t, "example.com", proxy.updated.Egress[0].Target) + require.Equal(t, policy.ActionAllow, proxy.updated.Egress[1].Action) + require.Equal(t, "keep.com", proxy.updated.Egress[1].Target) +} + +func TestHandleDelete_CaseInsensitiveMatch(t *testing.T) { + initial := &policy.NetworkPolicy{ + DefaultAction: policy.ActionDeny, + Egress: []policy.EgressRule{ + {Action: policy.ActionAllow, Target: "Example.COM"}, + {Action: policy.ActionDeny, Target: "Blocked.COM"}, + }, + } + proxy := &stubProxy{updated: initial} + nft := &stubNft{} + srv := &policyServer{proxy: proxy, nft: nft, enforcementMode: "dns+nft"} + + body := `["example.com"]` + req := httptest.NewRequest(http.MethodDelete, "/policy", strings.NewReader(body)) + w := httptest.NewRecorder() + + srv.handlePolicy(w, req) + + resp := w.Result() + require.Equal(t, http.StatusOK, resp.StatusCode, "expected 200 OK") + require.NotNil(t, proxy.updated) + require.Len(t, proxy.updated.Egress, 1, "expected 1 rule remaining") + require.Equal(t, "Blocked.COM", proxy.updated.Egress[0].Target, "unmatched rule should remain") +} + +func TestHandleDelete_NoMatchReturns200(t *testing.T) { + initial := &policy.NetworkPolicy{ + DefaultAction: policy.ActionDeny, + Egress: []policy.EgressRule{ + {Action: policy.ActionAllow, Target: "keep.com"}, + }, + } + proxy := &stubProxy{updated: initial} + nft := &stubNft{} + srv := &policyServer{proxy: proxy, nft: nft, enforcementMode: "dns+nft"} + + body := `["nonexistent.com"]` + req := httptest.NewRequest(http.MethodDelete, "/policy", strings.NewReader(body)) + w := httptest.NewRecorder() + + srv.handlePolicy(w, req) + + resp := w.Result() + require.Equal(t, http.StatusOK, resp.StatusCode, "expected 200 OK even when no targets match") + require.Equal(t, 0, nft.calls, "nft should not be called when nothing changes") + require.Len(t, proxy.updated.Egress, 1, "policy should be unchanged") +} + +func TestHandleDelete_EmptyBodyReturns400(t *testing.T) { + proxy := &stubProxy{updated: policy.DefaultDenyPolicy()} + srv := &policyServer{proxy: proxy, nft: nil, enforcementMode: "dns"} + + req := httptest.NewRequest(http.MethodDelete, "/policy", strings.NewReader("")) + w := httptest.NewRecorder() + + srv.handlePolicy(w, req) + + resp := w.Result() + require.Equal(t, http.StatusBadRequest, resp.StatusCode, "expected 400 for empty body") +} + +func TestHandleDelete_EmptyArrayReturns400(t *testing.T) { + proxy := &stubProxy{updated: policy.DefaultDenyPolicy()} + srv := &policyServer{proxy: proxy, nft: nil, enforcementMode: "dns"} + + body := `[]` + req := httptest.NewRequest(http.MethodDelete, "/policy", strings.NewReader(body)) + w := httptest.NewRecorder() + + srv.handlePolicy(w, req) + + resp := w.Result() + require.Equal(t, http.StatusBadRequest, resp.StatusCode, "expected 400 for empty array") +} + +func TestHandleDelete_InvalidJSONReturns400(t *testing.T) { + proxy := &stubProxy{updated: policy.DefaultDenyPolicy()} + srv := &policyServer{proxy: proxy, nft: nil, enforcementMode: "dns"} + + body := `not-json` + req := httptest.NewRequest(http.MethodDelete, "/policy", strings.NewReader(body)) + w := httptest.NewRecorder() + + srv.handlePolicy(w, req) + + resp := w.Result() + require.Equal(t, http.StatusBadRequest, resp.StatusCode, "expected 400 for invalid JSON") +} + +func TestHandleDelete_NftFailureReturns500(t *testing.T) { + initial := &policy.NetworkPolicy{ + DefaultAction: policy.ActionDeny, + Egress: []policy.EgressRule{ + {Action: policy.ActionAllow, Target: "example.com"}, + }, + } + proxy := &stubProxy{updated: initial} + nft := &stubNft{err: errors.New("nft apply failed")} + srv := &policyServer{proxy: proxy, nft: nft, enforcementMode: "dns+nft"} + + body := `["example.com"]` + req := httptest.NewRequest(http.MethodDelete, "/policy", strings.NewReader(body)) + w := httptest.NewRecorder() + + srv.handlePolicy(w, req) + + resp := w.Result() + require.Equal(t, http.StatusInternalServerError, resp.StatusCode, "expected 500 on nft failure") + require.Equal(t, 1, nft.calls, "expected nft ApplyStatic called once") + require.Len(t, proxy.updated.Egress, 1, "proxy should not be updated on nft failure") + require.Equal(t, "example.com", proxy.updated.Egress[0].Target, "original rule should remain") +} + func TestHandlePost_RejectsWhenOverMaxEgressRules(t *testing.T) { proxy := &stubProxy{} nft := &stubNft{} diff --git a/components/egress/policy_utils.go b/components/egress/policy_utils.go index 10c0a6cad..b3aa42176 100644 --- a/components/egress/policy_utils.go +++ b/components/egress/policy_utils.go @@ -83,6 +83,32 @@ func mergeEgressRules(base, additions []policy.EgressRule) []policy.EgressRule { return out } +// removeRulesByTarget returns a new slice with rules matching targets removed, +// plus the removed rules. Domain targets are matched case-insensitively. +// Targets not found are silently ignored. +func removeRulesByTarget(rules []policy.EgressRule, targets []string) (kept, removed []policy.EgressRule) { + if len(targets) == 0 || len(rules) == 0 { + return rules, nil + } + removeSet := make(map[string]struct{}, len(targets)) + for _, t := range targets { + key := strings.ToLower(strings.TrimSpace(t)) + if key == "" { + continue + } + removeSet[key] = struct{}{} + } + kept = make([]policy.EgressRule, 0, len(rules)) + for _, r := range rules { + if _, ok := removeSet[strings.ToLower(r.Target)]; ok { + removed = append(removed, r) + } else { + kept = append(kept, r) + } + } + return kept, removed +} + // mergeKey: domain targets lowercased for dedupe; IP/CIDR left as-is. func mergeKey(r policy.EgressRule) string { if r.Target == "" { diff --git a/components/egress/tests/smoke-nft.sh b/components/egress/tests/smoke-nft.sh index eac5c232e..ff704f7dd 100755 --- a/components/egress/tests/smoke-nft.sh +++ b/components/egress/tests/smoke-nft.sh @@ -155,6 +155,47 @@ else pass "www.mozilla.org blocked after patch" fi +info "DELETE: deny two hosts, then delete one rule" +curl -sSf -XPOST "http://127.0.0.1:${POLICY_PORT}/policy" \ + -d '{"defaultAction":"allow","egress":[{"action":"deny","target":"api.github.com"},{"action":"deny","target":"www.cloudflare.com"}]}' + +info "Test: both hosts should be blocked before delete" +if run_in_app -I https://api.github.com --max-time 8 >/dev/null 2>&1; then + fail "api.github.com should be blocked before delete" +fi +if run_in_app -I https://www.cloudflare.com --max-time 8 >/dev/null 2>&1; then + fail "www.cloudflare.com should be blocked before delete" +fi +pass "both hosts blocked before delete" + +info "Deleting api.github.com rule" +curl -sSf -XDELETE "http://127.0.0.1:${POLICY_PORT}/policy" \ + -d '["api.github.com"]' + +info "Test: api.github.com allowed, www.cloudflare.com still blocked after delete" +run_in_app -I https://api.github.com --max-time 20 >/dev/null 2>&1 || fail "api.github.com should be allowed after delete" +pass "api.github.com allowed after delete" +if run_in_app -I https://www.cloudflare.com --max-time 8 >/dev/null 2>&1; then + fail "www.cloudflare.com should remain blocked after delete" +fi +pass "www.cloudflare.com still blocked" + +info "Deleting non-existent target (idempotent)" +resp="$(curl -sSf -XDELETE "http://127.0.0.1:${POLICY_PORT}/policy" -d '["nonexistent.com"]')" +if echo "${resp}" | grep -q '"no matching targets found"'; then + pass "idempotent delete returns no matching targets found" +else + fail "expected no matching targets found, got: ${resp}" +fi + +info "Deleting with empty body (expect 400)" +http_code="$(curl -s -o /dev/null -w '%{http_code}' -XDELETE "http://127.0.0.1:${POLICY_PORT}/policy" -d '')" +if [ "${http_code}" = "400" ]; then + pass "empty body returns 400" +else + fail "empty body should return 400, got ${http_code}" +fi + info "Always-rule dynamic check (single transition)" curl -sSf -XPOST "http://127.0.0.1:${POLICY_PORT}/policy" \ -d '{"defaultAction":"deny","egress":[{"action":"allow","target":"api.github.com"}]}' diff --git a/components/execd/bootstrap.sh b/components/execd/bootstrap.sh index 9a6ec23c9..328e9b755 100755 --- a/components/execd/bootstrap.sh +++ b/components/execd/bootstrap.sh @@ -42,27 +42,54 @@ _sudo() { fi } -# Install mitm egress CA into the system trust store (no extra env vars). -# - Debian/Ubuntu/Alpine: update-ca-certificates + /usr/local/share/ca-certificates/ -# - RHEL/CentOS/Fedora/Alma/Rocky: update-ca-trust + /etc/pki/ca-trust/source/anchors/ +# Install mitm CA into the system trust store (for non-Python programs) +# and set OPENSANDBOX_MERGED_CA to a PEM bundle containing a full root +# set + mitm CA (for env vars like REQUESTS_CA_BUNDLE that *replace* +# rather than append to the default roots). +OPENSANDBOX_MERGED_CA="" trust_mitm_ca() { cert="$1" + merged="/opt/opensandbox/merged-ca-certificates.pem" + + # 1) Try to install into the system trust store (best-effort). if command -v update-ca-certificates >/dev/null 2>&1; then - _sudo mkdir -p /usr/local/share/ca-certificates - _sudo cp "$cert" /usr/local/share/ca-certificates/opensandbox-mitmproxy-ca.crt - _sudo update-ca-certificates - return 0 + _sudo mkdir -p /usr/local/share/ca-certificates \ + && _sudo cp "$cert" /usr/local/share/ca-certificates/opensandbox-mitmproxy-ca.crt \ + && _sudo update-ca-certificates \ + || echo "warning: update-ca-certificates failed; system trust store may not include mitm CA" >&2 + elif command -v update-ca-trust >/dev/null 2>&1; then + _sudo mkdir -p /etc/pki/ca-trust/source/anchors \ + && _sudo cp "$cert" /etc/pki/ca-trust/source/anchors/opensandbox-mitmproxy-ca.pem \ + && { _sudo update-ca-trust extract || _sudo update-ca-trust; } \ + || echo "warning: update-ca-trust failed; system trust store may not include mitm CA" >&2 + else + echo "warning: no system trust-store tooling found (need update-ca-certificates or update-ca-trust)" >&2 fi - if command -v update-ca-trust >/dev/null 2>&1; then - _sudo mkdir -p /etc/pki/ca-trust/source/anchors - _sudo cp "$cert" /etc/pki/ca-trust/source/anchors/opensandbox-mitmproxy-ca.pem - if ! _sudo update-ca-trust extract; then - _sudo update-ca-trust - fi - return 0 + + # 2) Build a merged bundle (complete root set + mitm CA). + # Prefer certifi (full Mozilla root set) over system bundles which + # may be incomplete in minimal Docker images. + certifi_ca="" + if command -v python3 >/dev/null 2>&1; then + certifi_ca="$(python3 -c 'import certifi; print(certifi.where())' 2>/dev/null)" || certifi_ca="" + elif command -v python >/dev/null 2>&1; then + certifi_ca="$(python -c 'import certifi; print(certifi.where())' 2>/dev/null)" || certifi_ca="" fi - echo "warning: cannot install mitm CA (need update-ca-certificates or update-ca-trust)" >&2 + for candidate in \ + "$certifi_ca" \ + /etc/ssl/certs/ca-certificates.crt \ + /etc/pki/tls/certs/ca-bundle.crt \ + /etc/ssl/cert.pem \ + /etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem; do + if [ -n "$candidate" ] && [ -f "$candidate" ] && [ -s "$candidate" ]; then + cat "$candidate" "$cert" > "$merged" + OPENSANDBOX_MERGED_CA="$merged" + return 0 + fi + done + + echo "warning: could not locate any CA bundle to merge with mitm CA" >&2 return 0 } @@ -102,7 +129,7 @@ trust_mitm_ca_nss() { MITM_CA="/opt/opensandbox/mitmproxy-ca-cert.pem" if is_truthy "${OPENSANDBOX_EGRESS_MITMPROXY_TRANSPARENT:-}"; then i=0 - while [ "$i" -lt 30 ]; do + while [ "$i" -lt 300 ]; do if [ -f "$MITM_CA" ] && [ -s "$MITM_CA" ]; then break fi @@ -110,15 +137,28 @@ if is_truthy "${OPENSANDBOX_EGRESS_MITMPROXY_TRANSPARENT:-}"; then i=$((i + 1)) done if [ ! -f "$MITM_CA" ] || [ ! -s "$MITM_CA" ]; then - echo "warning: timed out after 30s waiting for $MITM_CA (egress mitm CA export); continuing without system CA trust" >&2 - elif ! trust_mitm_ca "$MITM_CA"; then - echo "warning: failed to install mitm CA into system trust store; TLS interception may not work for system libraries" >&2 + echo "warning: timed out after 300s waiting for $MITM_CA (egress mitm CA export); continuing without system CA trust" >&2 + else + echo "mitm CA ready at $MITM_CA after ${i}s" + if ! trust_mitm_ca "$MITM_CA"; then + echo "warning: failed to install mitm CA into system trust store; TLS interception may not work for system libraries" >&2 + fi fi if [ -f "$MITM_CA" ] && [ -s "$MITM_CA" ]; then trust_mitm_ca_nss "$MITM_CA" || true - export NODE_EXTRA_CA_CERTS="$MITM_CA" - export REQUESTS_CA_BUNDLE="$MITM_CA" + export NODE_EXTRA_CA_CERTS="$MITM_CA" # additive — Node appends to built-in roots + + # REQUESTS_CA_BUNDLE and SSL_CERT_FILE replace the default bundle, + # so use merged roots (certifi/system CA + mitm CA). + if [ -n "$OPENSANDBOX_MERGED_CA" ] && [ -f "$OPENSANDBOX_MERGED_CA" ]; then + export REQUESTS_CA_BUNDLE="$OPENSANDBOX_MERGED_CA" + export SSL_CERT_FILE="$OPENSANDBOX_MERGED_CA" + else + echo "warning: merged CA bundle not available; REQUESTS_CA_BUNDLE/SSL_CERT_FILE will only contain the mitm CA" >&2 + export REQUESTS_CA_BUNDLE="$MITM_CA" + export SSL_CERT_FILE="$MITM_CA" + fi fi fi @@ -135,6 +175,27 @@ if ! touch "$EXECD_ENVS" 2>/dev/null; then fi export EXECD_ENVS +# Run a user-defined pre-script before launching execd. The script is sourced +# with POSIX `.` (not executed as a child process) so any variables it +# `export`s propagate to execd and the chained command below — a subprocess +# would lose those exports the moment it exits. +if [ -n "${EXECD_BOOTSTRAP_PRE_SCRIPT:-}" ]; then + if [ -f "$EXECD_BOOTSTRAP_PRE_SCRIPT" ] && [ -r "$EXECD_BOOTSTRAP_PRE_SCRIPT" ]; then + # Force `.` to read the literal path; without a slash it would fall + # back to a PATH search and could load the wrong file. + case "$EXECD_BOOTSTRAP_PRE_SCRIPT" in + */*) _pre_script="$EXECD_BOOTSTRAP_PRE_SCRIPT" ;; + *) _pre_script="./$EXECD_BOOTSTRAP_PRE_SCRIPT" ;; + esac + echo "sourcing pre-script $EXECD_BOOTSTRAP_PRE_SCRIPT" + # shellcheck disable=SC1090 + . "$_pre_script" + unset _pre_script + else + echo "warning: EXECD_BOOTSTRAP_PRE_SCRIPT=$EXECD_BOOTSTRAP_PRE_SCRIPT not found or not readable" >&2 + fi +fi + echo "starting OpenSandbox Execd daemon at $EXECD." $EXECD & diff --git a/components/execd/pkg/runtime/command.go b/components/execd/pkg/runtime/command.go index 893956366..fcabe1414 100644 --- a/components/execd/pkg/runtime/command.go +++ b/components/execd/pkg/runtime/command.go @@ -170,7 +170,32 @@ func (c *Controller) runCommand(ctx context.Context, request *ExecuteCodeRequest safego.Go(func() { for { select { + case <-done: + // cmd.Wait() has returned (or start failed). The pid is + // about to be — or already has been — reaped, so we + // must not signal it. Execute()'s defer cancel() fires + // after every foreground command, including successful + // ones, so without this gate the SIGKILL below would + // run on a recycled pid/pgid and could kill an + // unrelated process group. + return case <-ctx.Done(): + // Re-check `done` to avoid a race with cmd.Wait() + // returning concurrently. If cmd.Wait() has just + // finished, the leader pid may be reaped and recycled + // at any moment; signaling -pid would then target a + // foreign process group. + select { + case <-done: + return + default: + } + // Genuine cancellation (timeout, client disconnect, + // Interrupt). Kill the whole process group so children + // don't outlive the cancelled context. + if cmd.Process != nil { + _ = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL) + } return case sig := <-signals: if sig == nil { diff --git a/components/execd/pkg/runtime/command_common.go b/components/execd/pkg/runtime/command_common.go index 03205e20c..804869b61 100644 --- a/components/execd/pkg/runtime/command_common.go +++ b/components/execd/pkg/runtime/command_common.go @@ -32,13 +32,14 @@ func (c *Controller) tailStdPipe(file string, onExecute func(text string), done defer ticker.Stop() mutex := &sync.Mutex{} + var lastWasCR bool for { select { case <-done: - c.readFromPos(mutex, file, lastPos, onExecute, true) + c.readFromPos(mutex, file, lastPos, onExecute, true, &lastWasCR) return case <-ticker.C: - newPos := c.readFromPos(mutex, file, lastPos, onExecute, false) + newPos := c.readFromPos(mutex, file, lastPos, onExecute, false, &lastWasCR) lastPos = newPos } } @@ -104,7 +105,9 @@ func (c *Controller) combinedOutputFileName(session string) string { } // readFromPos streams new content from a file starting at startPos. -func (c *Controller) readFromPos(mutex *sync.Mutex, filepath string, startPos int64, onExecute func(string), flushIncomplete bool) int64 { +// lastWasCR persists CRLF detection across calls so a \r\n pair split between +// two polls does not surface a spurious blank line for the trailing \n. +func (c *Controller) readFromPos(mutex *sync.Mutex, filepath string, startPos int64, onExecute func(string), flushIncomplete bool, lastWasCR *bool) int64 { if !mutex.TryLock() { return -1 } @@ -121,6 +124,15 @@ func (c *Controller) readFromPos(mutex *sync.Mutex, filepath string, startPos in reader := bufio.NewReader(file) var buffer bytes.Buffer var currentPos int64 = startPos + cr := false + if lastWasCR != nil { + cr = *lastWasCR + } + defer func() { + if lastWasCR != nil { + *lastWasCR = cr + } + }() for { b, err := reader.ReadByte() @@ -138,15 +150,22 @@ func (c *Controller) readFromPos(mutex *sync.Mutex, filepath string, startPos in // Check if it's a line terminator (\n or \r) if b == '\n' || b == '\r' { - // If buffer has content, output this line - if buffer.Len() > 0 { + switch { + case buffer.Len() > 0: + // Flush the line content without the terminator onExecute(buffer.String()) buffer.Reset() + case b == '\n' && cr: + // Second half of a \r\n pair; already emitted on \r + default: + // Standalone blank line; surface it so callers see the gap + onExecute("\n") } - // Skip line terminator + cr = (b == '\r') continue } + cr = false buffer.WriteByte(b) } diff --git a/components/execd/pkg/runtime/command_signal_test.go b/components/execd/pkg/runtime/command_signal_test.go new file mode 100644 index 000000000..bd23b52d3 --- /dev/null +++ b/components/execd/pkg/runtime/command_signal_test.go @@ -0,0 +1,246 @@ +// Copyright 2025 Alibaba Group Holding Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !windows +// +build !windows + +package runtime + +import ( + "context" + "errors" + "os" + "os/exec" + "path/filepath" + "strconv" + "strings" + "sync" + "syscall" + "testing" + "time" + + "github.com/alibaba/opensandbox/execd/pkg/jupyter/execute" + "github.com/stretchr/testify/require" +) + +// TestRunCommand_CancelKillsChildren verifies that cancelling the context +// terminates not only the bash group leader but also its descendant +// processes. Regression test for +// https://github.com/alibaba/OpenSandbox/issues/922. +func TestRunCommand_CancelKillsChildren(t *testing.T) { + if _, err := exec.LookPath("bash"); err != nil { + t.Skip("bash not found in PATH") + } + + pidFile := filepath.Join(t.TempDir(), "child.pid") + + c := NewController("", "") + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + started := make(chan struct{}) + var once sync.Once + + req := &ExecuteCodeRequest{ + // Spawn a sleep child, record its pid, then wait so the bash + // leader stays alive until the context is cancelled. + Code: `sleep 30 & echo $! > "` + pidFile + `"; echo READY; wait`, + Cwd: t.TempDir(), + Timeout: 30 * time.Second, + Hooks: ExecuteResultHook{ + OnExecuteInit: func(_ string) {}, + OnExecuteStdout: func(s string) { + if strings.TrimSpace(s) == "READY" { + once.Do(func() { close(started) }) + } + }, + OnExecuteStderr: func(_ string) {}, + OnExecuteError: func(_ *execute.ErrorOutput) {}, + OnExecuteComplete: func(_ time.Duration) {}, + }, + } + + done := make(chan struct{}) + go func() { + _ = c.runCommand(ctx, req) + close(done) + }() + + select { + case <-started: + case <-time.After(10 * time.Second): + cancel() + <-done + t.Fatal("command did not emit READY in time") + } + + pidBytes, err := os.ReadFile(pidFile) + require.NoError(t, err, "expected child pid file") + childPid, err := strconv.Atoi(strings.TrimSpace(string(pidBytes))) + require.NoError(t, err) + require.Positive(t, childPid) + + require.NoError(t, syscall.Kill(childPid, 0), "child should be alive before cancel") + + cancel() + + select { + case <-done: + case <-time.After(5 * time.Second): + t.Fatal("runCommand did not return after cancel") + } + + deadline := time.Now().Add(2 * time.Second) + for time.Now().Before(deadline) { + if err := syscall.Kill(childPid, 0); err != nil { + require.True(t, errors.Is(err, syscall.ESRCH), + "unexpected liveness probe error: %v", err) + return + } + time.Sleep(50 * time.Millisecond) + } + t.Fatalf("child pid %d still alive 2s after cancel — process leak", childPid) +} + +// TestInterrupt_AfterFinished_ReturnsError verifies that an Interrupt +// arriving after the command has completed does not signal a recycled PID. +// Without this guard, group-wide kill would amplify the stale-PID hazard +// to every process in an unrelated process group. +func TestInterrupt_AfterFinished_ReturnsError(t *testing.T) { + if _, err := exec.LookPath("bash"); err != nil { + t.Skip("bash not found in PATH") + } + + c := NewController("", "") + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + var session string + completeCh := make(chan struct{}, 1) + req := &ExecuteCodeRequest{ + Code: `echo done`, + Cwd: t.TempDir(), + Timeout: 5 * time.Second, + Hooks: ExecuteResultHook{ + OnExecuteInit: func(s string) { session = s }, + OnExecuteStdout: func(_ string) {}, + OnExecuteStderr: func(_ string) {}, + OnExecuteError: func(_ *execute.ErrorOutput) {}, + OnExecuteComplete: func(_ time.Duration) { completeCh <- struct{}{} }, + }, + } + require.NoError(t, c.runCommand(ctx, req)) + + select { + case <-completeCh: + case <-time.After(3 * time.Second): + t.Fatal("command did not complete in time") + } + require.NotEmpty(t, session) + + err := c.Interrupt(session) + require.Error(t, err, "Interrupt on finished session must error") + require.Contains(t, err.Error(), "not running") + + snap := c.commandSnapshot(session) + require.NotNil(t, snap) + require.False(t, snap.running, "running flag should be cleared") + require.Equal(t, 0, snap.pid, "pid should be cleared to avoid stale-PID kill") +} + +// TestKillPid_ZombieLeaderDoesNotFail verifies that killPid does not +// return an error when a group leader becomes a zombie before its parent +// has reaped it. kill(-pid, 0) keeps reporting the group as observable +// while the zombie lingers, but SIGKILL has already been delivered and +// the kernel will tear the group down once Wait() runs. Treating that +// state as a failure caused Interrupt to surface a 500 even though the +// kill succeeded. +func TestKillPid_ZombieLeaderDoesNotFail(t *testing.T) { + if _, err := exec.LookPath("bash"); err != nil { + t.Skip("bash not found in PATH") + } + + cmd := exec.Command("bash", "-c", `sleep 30 & wait`) + cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} + require.NoError(t, cmd.Start()) + // Deliberately omit a reaper goroutine so the leader stays as a + // zombie after kill — that is the condition we want to exercise. + t.Cleanup(func() { + _ = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL) + _, _ = cmd.Process.Wait() + }) + + // Give bash a moment to spawn the sleep child so the group has more + // than just the leader. + time.Sleep(100 * time.Millisecond) + + c := &Controller{} + require.NoError(t, c.killPid(cmd.Process.Pid), + "slow post-SIGKILL teardown must not be reported as a hard failure") +} + +// TestKillPid_TerminatesEntireProcessGroup verifies that killPid signals +// the whole process group, not just the leader. Regression test for +// https://github.com/alibaba/OpenSandbox/issues/922. +func TestKillPid_TerminatesEntireProcessGroup(t *testing.T) { + if _, err := exec.LookPath("bash"); err != nil { + t.Skip("bash not found in PATH") + } + + pidFile := filepath.Join(t.TempDir(), "child.pid") + cmd := exec.Command("bash", "-c", + `sleep 30 & echo $! > "`+pidFile+`"; wait`) + cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} + require.NoError(t, cmd.Start()) + // Reap the leader concurrently so it doesn't linger as a zombie that + // keeps the process group "alive" from killPid's liveness probe + // perspective. Mirrors how runCommand's cmd.Wait() reaps in production. + waitDone := make(chan struct{}) + go func() { + _, _ = cmd.Process.Wait() + close(waitDone) + }() + t.Cleanup(func() { + _ = syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL) + <-waitDone + }) + + var childPid int + deadline := time.Now().Add(3 * time.Second) + for time.Now().Before(deadline) { + if data, err := os.ReadFile(pidFile); err == nil { + if pid, perr := strconv.Atoi(strings.TrimSpace(string(data))); perr == nil && pid > 0 { + childPid = pid + break + } + } + time.Sleep(50 * time.Millisecond) + } + require.Positive(t, childPid, "failed to capture child pid") + require.NoError(t, syscall.Kill(childPid, 0), "child should be alive before kill") + + c := &Controller{} + require.NoError(t, c.killPid(cmd.Process.Pid)) + + deadline = time.Now().Add(2 * time.Second) + for time.Now().Before(deadline) { + if err := syscall.Kill(childPid, 0); err != nil { + require.True(t, errors.Is(err, syscall.ESRCH), + "unexpected liveness probe error: %v", err) + return + } + time.Sleep(50 * time.Millisecond) + } + t.Fatalf("child pid %d still alive 2s after killPid — process leak", childPid) +} diff --git a/components/execd/pkg/runtime/command_status.go b/components/execd/pkg/runtime/command_status.go index 6dbc6d4f2..c0883d0fc 100644 --- a/components/execd/pkg/runtime/command_status.go +++ b/components/execd/pkg/runtime/command_status.go @@ -40,6 +40,9 @@ type CommandOutput struct { } func (c *Controller) commandSnapshot(session string) *commandKernel { + c.mu.RLock() + defer c.mu.RUnlock() + var kernel *commandKernel if v, ok := c.commandClientMap.Load(session); ok { kernel, _ = v.(*commandKernel) @@ -128,4 +131,8 @@ func (c *Controller) markCommandFinished(session string, exitCode int, errMsg st kernel.errMsg = errMsg kernel.running = false kernel.finishedAt = &now + // Clear the PID so a late or retried Interrupt cannot signal a recycled + // process. Group-wide kill would otherwise amplify the impact of a + // stale-PID hit to every process in the unrelated process group. + kernel.pid = 0 } diff --git a/components/execd/pkg/runtime/command_test.go b/components/execd/pkg/runtime/command_test.go index a6207fc3b..3e4ba1da8 100644 --- a/components/execd/pkg/runtime/command_test.go +++ b/components/execd/pkg/runtime/command_test.go @@ -42,7 +42,7 @@ func TestReadFromPos_SplitsOnCRAndLF(t *testing.T) { var got []string c := &Controller{} - nextPos := c.readFromPos(mutex, logFile, 0, func(s string) { got = append(got, s) }, false) + nextPos := c.readFromPos(mutex, logFile, 0, func(s string) { got = append(got, s) }, false, nil) want := []string{"line1", "prog 10%", "prog 20%", "prog 30%", "last"} require.Len(t, got, len(want)) @@ -59,7 +59,7 @@ func TestReadFromPos_SplitsOnCRAndLF(t *testing.T) { _ = f.Close() got = got[:0] - c.readFromPos(mutex, logFile, nextPos, func(s string) { got = append(got, s) }, false) + c.readFromPos(mutex, logFile, nextPos, func(s string) { got = append(got, s) }, false, nil) want = []string{"tail1", "tail2"} require.Len(t, got, len(want)) for i := range want { @@ -77,7 +77,7 @@ func TestReadFromPos_LongLine(t *testing.T) { var got []string c := &Controller{} - c.readFromPos(&sync.Mutex{}, logFile, 0, func(s string) { got = append(got, s) }, false) + c.readFromPos(&sync.Mutex{}, logFile, 0, func(s string) { got = append(got, s) }, false, nil) require.Len(t, got, 1, "expected one token") require.Equal(t, strings.TrimSuffix(longLine, "\n"), got[0], "long line mismatch") @@ -98,15 +98,86 @@ func TestReadFromPos_FlushesTrailingLine(t *testing.T) { } // First read: should only get complete lines with newlines - pos := c.readFromPos(mutex, file, 0, onExecute, false) + pos := c.readFromPos(mutex, file, 0, onExecute, false, nil) assert.GreaterOrEqual(t, pos, int64(0)) assert.Equal(t, []string{"line1"}, lines) // Flush at end: should output the last line (without newline) - c.readFromPos(mutex, file, pos, onExecute, true) + c.readFromPos(mutex, file, pos, onExecute, true, nil) assert.Equal(t, []string{"line1", "lastline-without-newline"}, lines) } +func TestReadFromPos_PreservesBlankLines(t *testing.T) { + tmp := t.TempDir() + logFile := filepath.Join(tmp, "stdout.log") + + // Mix of single newlines, consecutive blank lines, leading blank, and CRLF. + initial := "a\n\nb\n\n\nc\n\r\nd\n" + require.NoError(t, os.WriteFile(logFile, []byte(initial), 0o644)) + + var got []string + c := &Controller{} + c.readFromPos(&sync.Mutex{}, logFile, 0, func(s string) { got = append(got, s) }, false, nil) + + want := []string{"a", "\n", "b", "\n", "\n", "c", "\n", "d"} + require.Equal(t, want, got) +} + +// TestReadFromPos_CRLFAcrossPolls ensures a \r\n pair that arrives in two +// successive polls does not emit a spurious blank line for the trailing \n. +// Reproduces the regression on Windows/cmd writers that flush \r before \n. +func TestReadFromPos_CRLFAcrossPolls(t *testing.T) { + tmp := t.TempDir() + logFile := filepath.Join(tmp, "stdout.log") + + require.NoError(t, os.WriteFile(logFile, []byte("a\r"), 0o644)) + + var got []string + c := &Controller{} + mutex := &sync.Mutex{} + var lastWasCR bool + pos := c.readFromPos(mutex, logFile, 0, func(s string) { got = append(got, s) }, false, &lastWasCR) + require.Equal(t, []string{"a"}, got) + require.True(t, lastWasCR, "CR state must persist for next poll") + + f, err := os.OpenFile(logFile, os.O_APPEND|os.O_WRONLY, 0o644) + require.NoError(t, err) + _, err = f.WriteString("\nb\n") + require.NoError(t, err) + _ = f.Close() + + got = got[:0] + c.readFromPos(mutex, logFile, pos, func(s string) { got = append(got, s) }, false, &lastWasCR) + require.Equal(t, []string{"b"}, got, "trailing \\n of split CRLF must not emit a blank line") +} + +// TestReadFromPos_BlankCRLFAcrossPolls ensures a blank \r\n line split across +// polls is emitted as a single blank, not duplicated. +func TestReadFromPos_BlankCRLFAcrossPolls(t *testing.T) { + tmp := t.TempDir() + logFile := filepath.Join(tmp, "stdout.log") + + require.NoError(t, os.WriteFile(logFile, []byte("\r"), 0o644)) + + var got []string + c := &Controller{} + mutex := &sync.Mutex{} + var lastWasCR bool + pos := c.readFromPos(mutex, logFile, 0, func(s string) { got = append(got, s) }, false, &lastWasCR) + require.Equal(t, []string{"\n"}, got) + require.True(t, lastWasCR) + + f, err := os.OpenFile(logFile, os.O_APPEND|os.O_WRONLY, 0o644) + require.NoError(t, err) + _, err = f.WriteString("\n") + require.NoError(t, err) + _ = f.Close() + + got = got[:0] + c.readFromPos(mutex, logFile, pos, func(s string) { got = append(got, s) }, false, &lastWasCR) + require.Empty(t, got, "trailing \\n of split blank CRLF must not emit a second blank") +} + func TestRunCommand_Echo(t *testing.T) { if goruntime.GOOS == "windows" { t.Skip("bash not available on windows") diff --git a/components/execd/pkg/runtime/command_windows.go b/components/execd/pkg/runtime/command_windows.go index a3e418fb4..5c720dd4a 100644 --- a/components/execd/pkg/runtime/command_windows.go +++ b/components/execd/pkg/runtime/command_windows.go @@ -24,6 +24,7 @@ import ( "os" "os/exec" "strconv" + "sync" "time" "github.com/alibaba/opensandbox/execd/pkg/jupyter/execute" @@ -57,15 +58,21 @@ func (c *Controller) runCommand(ctx context.Context, request *ExecuteCodeRequest cmd.Env = mergeEnvs(os.Environ(), extraEnv) done := make(chan struct{}, 1) + var wg sync.WaitGroup + wg.Add(2) safego.Go(func() { + defer wg.Done() c.tailStdPipe(c.stdoutFileName(session), request.Hooks.OnExecuteStdout, done) }) safego.Go(func() { + defer wg.Done() c.tailStdPipe(c.stderrFileName(session), request.Hooks.OnExecuteStderr, done) }) err = cmd.Start() if err != nil { + close(done) + wg.Wait() request.Hooks.OnExecuteError(&execute.ErrorOutput{EName: "CommandExecError", EValue: err.Error()}) log.Error("CommandExecError: error starting commands: %v", err) return nil @@ -80,6 +87,7 @@ func (c *Controller) runCommand(ctx context.Context, request *ExecuteCodeRequest err = cmd.Wait() close(done) + wg.Wait() if err != nil { var eName, eValue string var traceback []string diff --git a/components/execd/pkg/runtime/interrupt.go b/components/execd/pkg/runtime/interrupt.go index b9cd2a545..3419f1ae7 100644 --- a/components/execd/pkg/runtime/interrupt.go +++ b/components/execd/pkg/runtime/interrupt.go @@ -20,13 +20,9 @@ package runtime import ( "errors" "fmt" - "os" - "strings" "syscall" "time" - "github.com/alibaba/opensandbox/internal/safego" - "github.com/alibaba/opensandbox/execd/pkg/log" ) @@ -38,8 +34,16 @@ func (c *Controller) Interrupt(sessionID string) error { log.Warning("Interrupting Jupyter kernel %s", kernel.kernelID) return kernel.client.InterruptKernel(kernel.kernelID) case c.getCommandKernel(sessionID) != nil: - kernel := c.getCommandKernel(sessionID) - return c.killPid(kernel.pid) + // Snapshot under c.mu so running/pid are observed consistently with + // markCommandFinished. killPid signals the entire process group, so + // guarding against a stale PID is critical: a late Interrupt on a + // finished session must not blast SIGTERM/SIGKILL at an unrelated + // process group that has reused the PID. + snapshot := c.commandSnapshot(sessionID) + if snapshot == nil || !snapshot.running || snapshot.pid <= 0 { + return fmt.Errorf("command session %s is not running", sessionID) + } + return c.killPid(snapshot.pid) case c.getBashSession(sessionID) != nil: return c.closeBashSession(sessionID) default: @@ -48,53 +52,71 @@ func (c *Controller) Interrupt(sessionID string) error { } // killPid sends SIGTERM followed by SIGKILL if needed. +// +// Commands are launched with Setpgid: true, so pid is also the process group +// id. We signal the entire group via syscall.Kill(-pid, sig) so child and +// grandchild processes are terminated, not just the group leader. +// +// kill(2) on a process group only guarantees delivery to at least one +// member, and kill(-pid, 0) keeps reporting the group as observable while +// any unreaped zombie lingers. The probe loops below are therefore +// best-effort logging — once a kill signal has been delivered, a slow or +// asynchronous teardown is not treated as a hard failure that would +// surface as a 500 from Interrupt. func (c *Controller) killPid(pid int) error { - process, err := os.FindProcess(pid) - if err != nil { - return err + if pid <= 0 { + return fmt.Errorf("invalid pid %d", pid) } - log.Warning("Attempting to terminate process %d", pid) + log.Warning("Attempting to terminate process group %d", pid) - if err := process.Signal(syscall.SIGTERM); err != nil { - if strings.Contains(err.Error(), "already finished") { + sigtermDelivered := false + if err := syscall.Kill(-pid, syscall.SIGTERM); err != nil { + if errors.Is(err, syscall.ESRCH) { return nil } - log.Warning("SIGTERM failed for pid %d: %v, trying SIGKILL", pid, err) + log.Warning("SIGTERM failed for pgroup %d: %v, trying SIGKILL", pid, err) } else { - done := make(chan error, 1) - safego.Go(func() { - _, err := process.Wait() - done <- err - }) - - select { - case err := <-done: - if err == nil { - log.Info("Process %d terminated gracefully", pid) - return nil + sigtermDelivered = true + // Probe the group for liveness. os.Process.Wait() doesn't apply + // because the leader is not a child of this goroutine. + deadline := time.Now().Add(3 * time.Second) + for time.Now().Before(deadline) { + if err := syscall.Kill(-pid, 0); err != nil { + if errors.Is(err, syscall.ESRCH) { + log.Info("Process group %d terminated gracefully", pid) + return nil + } } - case <-time.After(3 * time.Second): - log.Warning("Process %d did not terminate after SIGTERM, using SIGKILL", pid) + time.Sleep(50 * time.Millisecond) } + log.Warning("Process group %d did not exit after SIGTERM, escalating to SIGKILL", pid) } - if err := process.Signal(syscall.SIGKILL); err != nil { - if strings.Contains(err.Error(), "already finished") { + if err := syscall.Kill(-pid, syscall.SIGKILL); err != nil { + if errors.Is(err, syscall.ESRCH) { + return nil + } + if sigtermDelivered { + // SIGTERM was already delivered to at least one member, so the + // kill is in flight. SIGKILL failure here is commonly EPERM on + // a group reduced to zombies — the kernel will reap them once + // the parent runs Wait(). Surface as a warning rather than a + // hard error. + log.Warning("SIGKILL on pgroup %d failed: %v; teardown likely already in progress", pid, err) return nil } - return fmt.Errorf("failed to kill process %d: %w", pid, err) + return fmt.Errorf("failed to kill process group %d: %w", pid, err) } for range 3 { - if err := process.Signal(syscall.Signal(0)); err != nil { - if strings.Contains(err.Error(), "already finished") || - strings.Contains(err.Error(), "no such process") { - log.Info("Process %d confirmed terminated", pid) + if err := syscall.Kill(-pid, 0); err != nil { + if errors.Is(err, syscall.ESRCH) { + log.Info("Process group %d confirmed terminated", pid) return nil } } time.Sleep(50 * time.Millisecond) } - - return fmt.Errorf("process %d might still be running", pid) + log.Warning("Process group %d still observable after SIGKILL; teardown may complete asynchronously", pid) + return nil } diff --git a/components/execd/pkg/runtime/interrupt_windows.go b/components/execd/pkg/runtime/interrupt_windows.go index 6e1044d77..bbcd3ccdb 100644 --- a/components/execd/pkg/runtime/interrupt_windows.go +++ b/components/execd/pkg/runtime/interrupt_windows.go @@ -35,8 +35,14 @@ func (c *Controller) Interrupt(sessionID string) error { log.Warning("Interrupting Jupyter kernel %s", kernel.kernelID) return kernel.client.InterruptKernel(kernel.kernelID) case c.getCommandKernel(sessionID) != nil: - kernel := c.getCommandKernel(sessionID) - return c.killPid(kernel.pid) + // Guard against a stale PID after the command has finished: the + // kernel is retained in commandClientMap, so a late Interrupt could + // otherwise terminate an unrelated process that reused the PID. + snapshot := c.commandSnapshot(sessionID) + if snapshot == nil || !snapshot.running || snapshot.pid <= 0 { + return fmt.Errorf("command session %s is not running", sessionID) + } + return c.killPid(snapshot.pid) default: return errors.New("no such session") } diff --git a/components/execd/pkg/web/controller/basic.go b/components/execd/pkg/web/controller/basic.go index b33c660b0..2008fd63c 100644 --- a/components/execd/pkg/web/controller/basic.go +++ b/components/execd/pkg/web/controller/basic.go @@ -18,6 +18,7 @@ import ( "encoding/json" "net/http" "strconv" + "sync" "github.com/gin-gonic/gin" @@ -25,7 +26,8 @@ import ( ) type basicController struct { - ctx *gin.Context + ctx *gin.Context + sseSetupOnce sync.Once } func newBasicController(ctx *gin.Context) *basicController { diff --git a/components/execd/pkg/web/controller/codeinterpreting.go b/components/execd/pkg/web/controller/codeinterpreting.go index 93680e3f4..9b369d7d8 100644 --- a/components/execd/pkg/web/controller/codeinterpreting.go +++ b/components/execd/pkg/web/controller/codeinterpreting.go @@ -171,7 +171,9 @@ func (c *CodeInterpretingController) RunCode() { } runCodeRequest.Hooks = eventsHandler - c.setupSSEResponse() + // SSE headers are committed lazily on the first event write + // (see writeSingleEvent), so a synchronous error from Execute below can + // still be surfaced as a structured JSON error response. err = codeRunner.Execute(runCodeRequest) if err != nil { recordExecution("failure") @@ -400,7 +402,9 @@ func (c *CodeInterpretingController) RunInSession() { } runReq.Hooks = hooks - c.setupSSEResponse() + // SSE headers are committed lazily on the first event write + // (see writeSingleEvent), so a synchronous error from + // RunInBashSession can still be surfaced as a structured JSON error. err := codeRunner.RunInBashSession(ctx, runReq) if err != nil { recordExecution("failure") diff --git a/components/execd/pkg/web/controller/codeinterpreting_test.go b/components/execd/pkg/web/controller/codeinterpreting_test.go index 2beec41e5..af3c343b2 100644 --- a/components/execd/pkg/web/controller/codeinterpreting_test.go +++ b/components/execd/pkg/web/controller/codeinterpreting_test.go @@ -17,6 +17,7 @@ package controller import ( "context" "encoding/json" + "errors" "net/http" "testing" "time" @@ -333,3 +334,90 @@ func TestRunInSessionReturnsBeforeGracefulShutdownTimeoutAfterImmediateError(t * require.Equal(t, http.StatusOK, w.Code) require.Less(t, elapsed, flag.ApiGracefulShutdownTimeout/2) } + +// TestRunCodeSyncErrorEmitsJSONNotSSE guards against regression of the bug +// where Execute returning a synchronous error after setupSSEResponse caused +// the client to receive a text/event-stream response with a JSON body, which +// SDKs parsed as zero events ("empty sse stream"). Headers must stay +// uncommitted until the first event so RespondError can produce a proper +// application/json error response. +func TestRunCodeSyncErrorEmitsJSONNotSSE(t *testing.T) { + previousRunner := codeRunner + codeRunner = &fakeCodeRunner{ + execute: func(_ *runtime.ExecuteCodeRequest) error { + return errors.New("synchronous runtime failure") + }, + } + t.Cleanup(func() { codeRunner = previousRunner }) + + body := []byte(`{"code":"print(1)","context":{"id":"ctx-1","language":"python"}}`) + ctx, w := newTestContext(http.MethodPost, "/code/run", body) + ctrl := NewCodeInterpretingController(ctx) + + ctrl.RunCode() + + require.Equal(t, http.StatusInternalServerError, w.Code) + contentType := w.Header().Get("Content-Type") + require.Contains(t, contentType, "application/json", "should not commit text/event-stream when no event fires") + + var resp model.ErrorResponse + require.NoError(t, json.Unmarshal(w.Body.Bytes(), &resp)) + require.Equal(t, model.ErrorCodeRuntimeError, resp.Code) + require.Contains(t, resp.Message, "synchronous runtime failure") +} + +// TestRunInSessionSyncErrorEmitsJSONNotSSE — see TestRunCodeSyncErrorEmitsJSONNotSSE. +func TestRunInSessionSyncErrorEmitsJSONNotSSE(t *testing.T) { + previousRunner := codeRunner + codeRunner = &fakeCodeRunner{ + runInBashSession: func(_ context.Context, _ *runtime.ExecuteCodeRequest) error { + return errors.New("synchronous session failure") + }, + } + t.Cleanup(func() { codeRunner = previousRunner }) + + body := []byte(`{"command":"echo hi","timeout":0}`) + ctx, w := newTestContext(http.MethodPost, "/sessions/session-1/run", body) + ctx.Params = append(ctx.Params, gin.Param{Key: "sessionId", Value: "session-1"}) + ctrl := NewCodeInterpretingController(ctx) + + ctrl.RunInSession() + + require.Equal(t, http.StatusInternalServerError, w.Code) + contentType := w.Header().Get("Content-Type") + require.Contains(t, contentType, "application/json", "should not commit text/event-stream when no event fires") + + var resp model.ErrorResponse + require.NoError(t, json.Unmarshal(w.Body.Bytes(), &resp)) + require.Equal(t, model.ErrorCodeRuntimeError, resp.Code) + require.Contains(t, resp.Message, "synchronous session failure") +} + +// TestRunCodeSuccessStillEmitsSSE confirms the lazy header path still produces +// a text/event-stream response when at least one event fires. +func TestRunCodeSuccessStillEmitsSSE(t *testing.T) { + previousRunner := codeRunner + previousTimeout := flag.ApiGracefulShutdownTimeout + codeRunner = &fakeCodeRunner{ + execute: func(request *runtime.ExecuteCodeRequest) error { + request.Hooks.OnExecuteInit("session-1") + request.Hooks.OnExecuteComplete(time.Millisecond) + return nil + }, + } + flag.ApiGracefulShutdownTimeout = 50 * time.Millisecond + t.Cleanup(func() { + codeRunner = previousRunner + flag.ApiGracefulShutdownTimeout = previousTimeout + }) + + body := []byte(`{"code":"print(1)","context":{"id":"ctx-1","language":"python"}}`) + ctx, w := newTestContext(http.MethodPost, "/code/run", body) + ctrl := NewCodeInterpretingController(ctx) + + ctrl.RunCode() + + require.Equal(t, http.StatusOK, w.Code) + require.Contains(t, w.Header().Get("Content-Type"), "text/event-stream") + require.NotEmpty(t, w.Body.Bytes(), "successful run should write SSE events") +} diff --git a/components/execd/pkg/web/controller/command.go b/components/execd/pkg/web/controller/command.go index f89e570f4..9c21ec6fe 100644 --- a/components/execd/pkg/web/controller/command.go +++ b/components/execd/pkg/web/controller/command.go @@ -80,7 +80,9 @@ func (c *CodeInterpretingController) RunCommand() { } runCodeRequest.Hooks = eventsHandler - c.setupSSEResponse() + // SSE headers are committed lazily on the first event write + // (see writeSingleEvent), so a synchronous error from Execute below can + // still be surfaced as a structured JSON error response. err = codeRunner.Execute(runCodeRequest) if err != nil { recordExecution("failure") diff --git a/components/execd/pkg/web/controller/filesystem_upload.go b/components/execd/pkg/web/controller/filesystem_upload.go index 459a9d2d7..eb5397f7c 100644 --- a/components/execd/pkg/web/controller/filesystem_upload.go +++ b/components/execd/pkg/web/controller/filesystem_upload.go @@ -18,173 +18,211 @@ import ( "encoding/json" "fmt" "io" + "mime/multipart" "net/http" "os" "path/filepath" + "time" "github.com/alibaba/opensandbox/execd/pkg/log" "github.com/alibaba/opensandbox/execd/pkg/util/pathutil" "github.com/alibaba/opensandbox/execd/pkg/web/model" ) +type uploadError struct { + status int + code model.ErrorCode + message string +} + +func newUploadError(status int, code model.ErrorCode, message string) *uploadError { + return &uploadError{status: status, code: code, message: message} +} + // UploadFile uploads files with metadata to specified paths func (c *FilesystemController) UploadFile() { rec := beginFilesystemMetric("upload") defer rec.Finish(c.basicController) + metadataParts, fileParts, uerr := c.parseUploadForm() + if uerr != nil { + c.RespondError(uerr.status, uerr.code, uerr.message) + return + } + + for i := range metadataParts { + if uerr := c.processUploadPair(metadataParts[i], fileParts[i]); uerr != nil { + c.RespondError(uerr.status, uerr.code, uerr.message) + return + } + } + + rec.MarkSuccess() + c.RespondSuccess(nil) +} + +func (c *FilesystemController) parseUploadForm() ([]*multipart.FileHeader, []*multipart.FileHeader, *uploadError) { form, err := c.ctx.MultipartForm() if err != nil || form == nil { - c.RespondError( - http.StatusBadRequest, - model.ErrorCodeInvalidFile, - "multipart form is empty", - ) - return + return nil, nil, newUploadError(http.StatusBadRequest, model.ErrorCodeInvalidFile, "multipart form is empty") } metadataParts := form.File["metadata"] fileParts := form.File["file"] if len(metadataParts) == 0 { - c.RespondError( - http.StatusBadRequest, - model.ErrorCodeInvalidFileMetadata, - "metadata file is missing", - ) - return + return nil, nil, newUploadError(http.StatusBadRequest, model.ErrorCodeInvalidFileMetadata, "metadata file is missing") } - if len(fileParts) == 0 { - c.RespondError( - http.StatusBadRequest, - model.ErrorCodeInvalidFileContent, - "file is missing", - ) - return + return nil, nil, newUploadError(http.StatusBadRequest, model.ErrorCodeInvalidFileContent, "file is missing") } - if len(metadataParts) != len(fileParts) { - c.RespondError( + return nil, nil, newUploadError( http.StatusBadRequest, model.ErrorCodeInvalidFile, fmt.Sprintf("metadata and file count mismatch: %d vs %d", len(metadataParts), len(fileParts)), ) - return } + return metadataParts, fileParts, nil +} - for i := range metadataParts { - metadataHeader := metadataParts[i] - metadataFile, err := metadataHeader.Open() - if err != nil { - c.RespondError( - http.StatusBadRequest, - model.ErrorCodeInvalidFileMetadata, - fmt.Sprintf("error opening metadata file. %v", err), - ) - return - } +func (c *FilesystemController) processUploadPair(metadataHeader, fileHeader *multipart.FileHeader) *uploadError { + meta, uerr := parseUploadMetadata(metadataHeader) + if uerr != nil { + return uerr + } - metaBytes, err := io.ReadAll(metadataFile) - metadataFile.Close() - if err != nil { - c.RespondError( - http.StatusBadRequest, - model.ErrorCodeInvalidFileMetadata, - fmt.Sprintf("error reading metadata content. %v", err), - ) - return - } + resolvedPath, uerr := resolveUploadTarget(meta.Path) + if uerr != nil { + return uerr + } - var meta model.FileMetadata - if err := json.Unmarshal(metaBytes, &meta); err != nil { - c.RespondError( - http.StatusBadRequest, - model.ErrorCodeInvalidFileMetadata, - fmt.Sprintf("invalid metadata format. %v", err), - ) - return - } + if uerr := writeUploadFile(resolvedPath, fileHeader); uerr != nil { + return uerr + } - targetPath := meta.Path - if targetPath == "" { - c.RespondError( - http.StatusBadRequest, - model.ErrorCodeInvalidFileMetadata, - "metadata path is empty", - ) - return - } - resolvedPath, err := pathutil.ExpandPath(targetPath) - if err != nil { - c.RespondError( - http.StatusInternalServerError, - model.ErrorCodeRuntimeError, - fmt.Sprintf("error resolving target path %s. %v", targetPath, err), - ) - return - } + return applyUploadPermission(resolvedPath, meta.Permission) +} - targetDir := filepath.Dir(resolvedPath) - if err := os.MkdirAll(targetDir, os.ModePerm); err != nil { - c.RespondError( - http.StatusInternalServerError, - model.ErrorCodeRuntimeError, - fmt.Sprintf("error creating target directory %s. %v", targetDir, err), - ) - return - } +func parseUploadMetadata(header *multipart.FileHeader) (*model.FileMetadata, *uploadError) { + metadataFile, err := header.Open() + if err != nil { + return nil, newUploadError( + http.StatusBadRequest, + model.ErrorCodeInvalidFileMetadata, + fmt.Sprintf("error opening metadata file. %v", err), + ) + } + metaBytes, err := io.ReadAll(metadataFile) + metadataFile.Close() + if err != nil { + return nil, newUploadError( + http.StatusBadRequest, + model.ErrorCodeInvalidFileMetadata, + fmt.Sprintf("error reading metadata content. %v", err), + ) + } - fileHeader := fileParts[i] - file, err := fileHeader.Open() - if err != nil { - c.RespondError( - http.StatusInternalServerError, - model.ErrorCodeRuntimeError, - fmt.Sprintf("error opening file %s. %v", fileHeader.Filename, err), - ) - return - } + var meta model.FileMetadata + if err := json.Unmarshal(metaBytes, &meta); err != nil { + return nil, newUploadError( + http.StatusBadRequest, + model.ErrorCodeInvalidFileMetadata, + fmt.Sprintf("invalid metadata format. %v", err), + ) + } + if meta.Path == "" { + return nil, newUploadError(http.StatusBadRequest, model.ErrorCodeInvalidFileMetadata, "metadata path is empty") + } + return &meta, nil +} - dst, err := os.OpenFile(resolvedPath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, os.ModePerm) - if err != nil { - file.Close() - c.RespondError( - http.StatusInternalServerError, - model.ErrorCodeRuntimeError, - fmt.Sprintf("error opening destination file %s. %v", resolvedPath, err), - ) - return - } +func resolveUploadTarget(targetPath string) (string, *uploadError) { + resolvedPath, err := pathutil.ExpandPath(targetPath) + if err != nil { + return "", newUploadError( + http.StatusInternalServerError, + model.ErrorCodeRuntimeError, + fmt.Sprintf("error resolving target path %s. %v", targetPath, err), + ) + } + targetDir := filepath.Dir(resolvedPath) + if err := os.MkdirAll(targetDir, os.ModePerm); err != nil { + return "", newUploadError( + http.StatusInternalServerError, + model.ErrorCodeRuntimeError, + fmt.Sprintf("error creating target directory %s. %v", targetDir, err), + ) + } + return resolvedPath, nil +} - if _, err := io.Copy(dst, file); err != nil { - dst.Close() - file.Close() - c.RespondError( - http.StatusInternalServerError, - model.ErrorCodeRuntimeError, - fmt.Sprintf("error copying file %s. %v", resolvedPath, err), - ) - return - } +func writeUploadFile(resolvedPath string, fileHeader *multipart.FileHeader) *uploadError { + file, err := fileHeader.Open() + if err != nil { + return newUploadError( + http.StatusInternalServerError, + model.ErrorCodeRuntimeError, + fmt.Sprintf("error opening file %s. %v", fileHeader.Filename, err), + ) + } + defer file.Close() + + dst, err := os.OpenFile(resolvedPath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, os.ModePerm) + if err != nil { + return newUploadError( + http.StatusInternalServerError, + model.ErrorCodeRuntimeError, + fmt.Sprintf("error opening destination file %s. %v", resolvedPath, err), + ) + } - if err := dst.Sync(); err != nil { - log.Error("failed to sync target file: %v", err) - } - if err := dst.Close(); err != nil { - log.Error("failed to close target file: %v", err) - } - file.Close() - - if err := ChmodFile(resolvedPath, meta.Permission); err != nil { - c.RespondError( - http.StatusInternalServerError, - model.ErrorCodeRuntimeError, - fmt.Sprintf("error chmoding file %s. %v", resolvedPath, err), - ) - return + if _, err := io.Copy(dst, file); err != nil { + dst.Close() + return newUploadError( + http.StatusInternalServerError, + model.ErrorCodeRuntimeError, + fmt.Sprintf("error copying file %s. %v", resolvedPath, err), + ) + } + + if err := dst.Sync(); err != nil { + log.Error("failed to sync target file: %v", err) + } + if err := dst.Close(); err != nil { + log.Error("failed to close target file: %v", err) + } + + // fsync parent directory so the new dirent is durable and visible on + // weakly-coherent filesystems (virtio-fs, 9pfs, etc.). Best-effort: + // some filesystems return ENOTSUP for directory fsync. + targetDir := filepath.Dir(resolvedPath) + if d, err := os.Open(targetDir); err == nil { + if err := d.Sync(); err != nil { + log.Warning("failed to sync parent dir %s: %v", targetDir, err) } + _ = d.Close() } + return nil +} - rec.MarkSuccess() - c.RespondSuccess(nil) +// applyUploadPermission applies the metadata permission with one retry to +// absorb metadata-propagation delay on weakly-coherent filesystems +// (virtio-fs, 9pfs). ChmodFile always invokes chown under the hood, so a +// freshly-created dirent that has not yet propagated will surface as ENOENT +// here even though the file is fully written and synced. +func applyUploadPermission(resolvedPath string, permission model.Permission) *uploadError { + chmodErr := ChmodFile(resolvedPath, permission) + if chmodErr != nil { + time.Sleep(20 * time.Millisecond) + chmodErr = ChmodFile(resolvedPath, permission) + } + if chmodErr != nil { + return newUploadError( + http.StatusInternalServerError, + model.ErrorCodeRuntimeError, + fmt.Sprintf("error chmoding file %s. %v", resolvedPath, chmodErr), + ) + } + return nil } diff --git a/components/execd/pkg/web/controller/sse.go b/components/execd/pkg/web/controller/sse.go index 573315260..29dddb9d2 100644 --- a/components/execd/pkg/web/controller/sse.go +++ b/components/execd/pkg/web/controller/sse.go @@ -36,13 +36,21 @@ var sseHeaders = map[string]string{ "X-Accel-Buffering": "no", } +// setupSSEResponse is idempotent: once headers are committed, subsequent calls +// no-op. Callers that need the headers up front (e.g. long-running streaming +// endpoints with no early-error path) can call it explicitly. Endpoints that +// may fail synchronously before any event fires should leave header commit to +// the lazy path inside writeSingleEvent so pre-execution errors can return a +// proper JSON body instead of a half-formed text/event-stream response. func (c *basicController) setupSSEResponse() { - for key, value := range sseHeaders { - c.ctx.Writer.Header().Set(key, value) - } - if flusher, ok := c.ctx.Writer.(http.Flusher); ok { - flusher.Flush() - } + c.sseSetupOnce.Do(func() { + for key, value := range sseHeaders { + c.ctx.Writer.Header().Set(key, value) + } + if flusher, ok := c.ctx.Writer.(http.Flusher); ok { + flusher.Flush() + } + }) } // setServerEventsHandler adapts runtime callbacks to SSE events. @@ -167,6 +175,10 @@ func (c *CodeInterpretingController) writeSingleEvent(handler string, data []byt c.chunkWriter.Lock() defer c.chunkWriter.Unlock() + // Lazily commit SSE response headers on the first event. This lets the + // surrounding handler return a proper JSON error via RespondError if the + // runtime fails synchronously before any event fires. + c.setupSSEResponse() defer func() { if flusher, ok := c.ctx.Writer.(http.Flusher); ok { flusher.Flush() diff --git a/components/execd/tests/smoke_api.py b/components/execd/tests/smoke_api.py index df9f96f6e..36a3cdca8 100644 --- a/components/execd/tests/smoke_api.py +++ b/components/execd/tests/smoke_api.py @@ -91,6 +91,58 @@ def fetch_logs(cmd_id: str, cursor: int = 0): return r.text, r.headers.get("EXECD-COMMANDS-TAIL-CURSOR") +def run_command_blank_lines(): + """ + Foreground command whose stdout contains consecutive newlines must surface + blank-line events instead of dropping them. Regression test for the + readFromPos fix that preserves empty lines (a\n\nb -> ["a", "\n", "b"]). + """ + url = f"{BASE_URL}/command" + # Pick a shell-native command per platform so the regression covers both + # POSIX (LF-only) and Windows cmd (CRLF) byte streams without depending on + # Git for Windows / MSYS argv mangling. The execd reader collapses CRLF to + # LF, so both produce ["a", "\n", "b", "\n", "\n", "c"]. + if os.name == "nt": + # cmd /C echo chain: each segment writes "\r\n"; "echo." writes + # a bare "\r\n". Order is deterministic because "&" is sequential. + command = "echo a&echo.&echo b&echo.&echo.&echo c" + else: + # printf emits exact bytes: a\n\nb\n\n\nc\n + command = "printf 'a\\n\\nb\\n\\n\\nc\\n'" + payload = { + "command": command, + "background": False, + } + + stdout_texts = [] + saw_complete = False + with session.post(url, json=payload, stream=True, timeout=15) as resp: + expect(resp.status_code == 200, f"SSE start failed: {resp.status_code} {resp.text}") + for line in resp.iter_lines(): + if not line: + continue + try: + if line.startswith(b"data:"): + data = json.loads(line[len(b"data:") :].decode()) + else: + data = json.loads(line.decode()) + except Exception: + continue + event_type = data.get("type") + if event_type == "stdout": + stdout_texts.append(data.get("text", "")) + elif event_type == "execution_complete": + saw_complete = True + break + + expect(saw_complete, "did not observe execution_complete") + want = ["a", "\n", "b", "\n", "\n", "c"] + expect( + stdout_texts == want, + f"blank-line stdout sequence mismatch: got {stdout_texts!r}, want {want!r}", + ) + + def sse_disconnect_should_stop_ping(): """ Open an SSE stream for a long-running command, receive init, then close the @@ -248,6 +300,9 @@ def main(): sse_disconnect_should_stop_ping() print("[+] SSE disconnect handled") + run_command_blank_lines() + print("[+] run_command preserves blank lines") + cmd_id = sse_get_command_id() print(f"[+] command id: {cmd_id}") diff --git a/examples/README.md b/examples/README.md index 59fd8087f..42e4eb8c3 100644 --- a/examples/README.md +++ b/examples/README.md @@ -21,6 +21,7 @@ Examples for common OpenSandbox use cases. Each subdirectory contains runnable c - 🦞 [**nullclaw**](nullclaw): Launch a Nullclaw Gateway inside a sandbox - 🦞 [**openclaw**](openclaw): Run an OpenClaw Gateway inside a sandbox - 🖥️ [**desktop**](desktop): Launch VNC desktop (Xvfb + x11vnc) for VNC client connections +- 🪟 [**windows**](windows): Run a Windows guest VM via KVM/QEMU with RDP and web console access - Playwright [**playwright**](playwright): Launch headless browser (Playwright + Chromium) to scrape web content - VS Code [**vscode**](vscode): Launch code-server (VS Code Web) to provide browser access - Google Chrome [**chrome**](chrome): Launch headless Chromium with DevTools port exposed for remote debugging diff --git a/examples/agent-sandbox/README.md b/examples/agent-sandbox/README.md index 8b6eb0d64..c93b9ef95 100644 --- a/examples/agent-sandbox/README.md +++ b/examples/agent-sandbox/README.md @@ -23,7 +23,7 @@ opensandbox-server init-config ~/.sandbox.toml --example docker ```toml [runtime] type = "kubernetes" -execd_image = "opensandbox/execd:v1.0.15" +execd_image = "opensandbox/execd:v1.0.18" [kubernetes] namespace = "default" diff --git a/examples/code-interpreter/README.md b/examples/code-interpreter/README.md index 7e2fed488..eb689dfe2 100644 --- a/examples/code-interpreter/README.md +++ b/examples/code-interpreter/README.md @@ -104,7 +104,7 @@ spec: - name: opensandbox-bin mountPath: /opt/opensandbox/bin - name: execd-installer - image: sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/execd:v1.0.15 + image: sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/execd:v1.0.18 command: [ "/bin/sh", "-c" ] args: - | diff --git a/examples/host-volume-mount/README.md b/examples/host-volume-mount/README.md index 60fbaeb85..a6ad88372 100644 --- a/examples/host-volume-mount/README.md +++ b/examples/host-volume-mount/README.md @@ -224,4 +224,4 @@ Sandbox sandbox = Sandbox.builder() - [OSEP-0003: Volume and VolumeBinding Support](../../oseps/0003-volume-and-volumebinding-support.md) — Design proposal - [Sandbox Lifecycle API Spec](../../specs/sandbox-lifecycle.yml) — OpenAPI schema for volume definitions -- [Server Configuration](../../server/example.config.toml) — `[storage]` section for `allowed_host_paths` +- [Server Configuration](../../server/opensandbox_server/examples/example.config.toml) — `[storage]` section for `allowed_host_paths` diff --git a/examples/host-volume-mount/README_zh.md b/examples/host-volume-mount/README_zh.md index 86d7bf414..fce9ccb68 100644 --- a/examples/host-volume-mount/README_zh.md +++ b/examples/host-volume-mount/README_zh.md @@ -233,4 +233,4 @@ Sandbox sandbox = Sandbox.builder() - [OSEP-0003: Volume 与 VolumeBinding 支持](../../oseps/0003-volume-and-volumebinding-support.md) — 设计提案 - [Sandbox Lifecycle API 规范](../../specs/sandbox-lifecycle.yml) — Volume 定义的 OpenAPI 规范 -- [服务端配置示例](../../server/example.config.zh.toml) — `[storage]` 段中的 `allowed_host_paths` 配置 +- [服务端配置示例](../../server/opensandbox_server/examples/example.config.zh.toml) — `[storage]` 段中的 `allowed_host_paths` 配置 diff --git a/examples/openclaw/main.py b/examples/openclaw/main.py index 52ab8b69c..e441d72ef 100644 --- a/examples/openclaw/main.py +++ b/examples/openclaw/main.py @@ -23,7 +23,8 @@ # Configuration defaults - can be overridden via environment variables -DEFAULT_SERVER = os.getenv("OPENCLAW_SERVER", "http://localhost:8080") +DEFAULT_SERVER = os.getenv("OPEN_SANDBOX_SERVER", "http://localhost:8080") +DEFAULT_API_KEY = os.getenv("OPEN_SANDBOX_API_KEY", "") DEFAULT_IMAGE = os.getenv("OPENCLAW_IMAGE", "ghcr.io/openclaw/openclaw:latest") DEFAULT_TIMEOUT = int(os.getenv("OPENCLAW_TIMEOUT", "3600")) DEFAULT_TOKEN = os.getenv("OPENCLAW_TOKEN", "dummy-token-for-sandbox") @@ -64,12 +65,14 @@ def check_openclaw(sbx: SandboxSync, port: int = DEFAULT_PORT) -> bool: def main() -> None: server = DEFAULT_SERVER + api_key = DEFAULT_API_KEY image = DEFAULT_IMAGE timeout_seconds = DEFAULT_TIMEOUT token = os.getenv("OPENCLAW_GATEWAY_TOKEN", DEFAULT_TOKEN) port = DEFAULT_PORT print(f"Creating openclaw sandbox with image={image} on OpenSandbox server {server}...") + print(f" API Key: {api_key[:16]}..." if len(api_key) > 16 else f" API Key: {api_key}") print(f" Token: {token[:16]}..." if len(token) > 16 else f" Token: {token}") print(f" Port: {port}") print(f" Timeout: {timeout_seconds}s") @@ -78,8 +81,8 @@ def main() -> None: image=image, timeout=timedelta(seconds=timeout_seconds), metadata={"example": "openclaw"}, - entrypoint=[f"node dist/index.js gateway --bind=lan --port {port} --allow-unconfigured --verbose"], - connection_config=ConnectionConfigSync(domain=server), + entrypoint=["node", "dist/index.js", "gateway", "--bind=lan", "--port", str(port), "--allow-unconfigured", "--verbose"], + connection_config=ConnectionConfigSync(domain=server, api_key=api_key), health_check=lambda sbx: check_openclaw(sbx, port), # env for openclaw env={ @@ -101,4 +104,4 @@ def main() -> None: print(f"Openclaw started finished. Please refer to {endpoint.endpoint}") if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/examples/windows/README.md b/examples/windows/README.md new file mode 100644 index 000000000..05afc0d51 --- /dev/null +++ b/examples/windows/README.md @@ -0,0 +1,182 @@ +# Windows Sandbox Example + +Run a Windows guest in an OpenSandbox sandbox via KVM/QEMU using the [`dockur/windows`](https://github.com/dockur/windows) image. + +## How it works + +OpenSandbox creates a Linux container running KVM/QEMU, which boots a Windows guest OS inside it. The Windows profile (`platform.os=windows`) automatically configures the required devices, capabilities, OEM scripts, and port mappings — you only need to specify `platform` and `resource` in the SDK call. + +## Prerequisites + +- OpenSandbox server running (e.g. `http://localhost:8080`) +- Host with `/dev/kvm` and `/dev/net/tun` present +- Server `storage.allowed_host_paths` configured for any host bind mounts + +## Start OpenSandbox server [local] + +```shell +uv pip install opensandbox-server +opensandbox-server init-config ~/.sandbox.toml --example docker +opensandbox-server +``` + +## Run the example + +```shell +uv pip install opensandbox +python main.py +``` + +The script will: + +1. Create a Windows sandbox with `dockurr/windows:latest` and Windows 11 +2. Wait until the sandbox is healthy (first boot can take several minutes) +3. Print the execd, RDP (3389), and web console (8006) endpoints +4. Execute a test command and print the output + +## Environment Variables + +- `SANDBOX_DOMAIN`: Sandbox service address (default: `localhost:8080`) +- `SANDBOX_API_KEY`: API key if your server requires authentication (optional for local) + +## Customization + +### Resource limits + +The Windows profile enforces minimum resources: **cpu >= 2, memory >= 4G, disk >= 64G**. The example uses 4 CPU, 8G RAM, and 64G disk. You can adjust these in the `main.py` `resource` dict. + +### Persistent storage + +Bind a host directory to `/storage` for a persistent system disk (add to the `SandboxSync.create` call): + +```python +from opensandbox.models.sandboxes import Host, Volume + +volumes = [ + Volume( + name="win-storage", + host=Host(path="/data/opensandbox/windows-storage"), + mount_path="/storage", + read_only=False, + ), +] +``` + +### Local ISO + +Bind a Windows install ISO to `/boot.iso` to avoid repeated downloads: + +```python +volumes = [ + Volume( + name="win-iso", + host=Host(path="/data/iso/Win11_23H2.iso"), + mount_path="/boot.iso", + read_only=True, + ), +] +``` + +### Windows guest configuration + +Pass [dockur/windows environment variables](https://github.com/dockur/windows) through the `env` parameter: + +```python +env = { + "VERSION": "11l", + "USERNAME": "Docker", + "PASSWORD": "your-secure-password", + "LANGUAGE": "Chinese", + "REGION": "zh-CN", + "KEYBOARD": "zh-CN", +} +``` + +Do not manually set `CPU_CORES`, `RAM_SIZE`, or `DISK_SIZE` — they are derived from `resourceLimits` automatically. + +## Exposed ports + +| Port | Service | +|------|---------| +| 44772 | execd (sandbox execution API) | +| 8080 | HTTP service | +| 3389 | RDP (native Remote Desktop) | +| 8006 | Web console (noVNC) | + +## Troubleshooting + +- **`Unsupported platform.os 'windows'`**: Server build has no Windows profile; upgrade OpenSandbox server. +- **`INVALID_PARAMETER` for resourceLimits**: Ensure cpu >= 2, memory >= 4G, disk >= 64G. +- **Stays Pending a long time**: First Windows install is slow; check host resources and `/storage` space, increase `ready_timeout`. +- **Status Running but endpoint unreachable**: Verify endpoint resolution returns a valid address; check `USER_PORTS` if you need additional ports forwarded. + +### ENI CNI network issue (Alibaba Cloud ACK) + +On clusters using ENI-based CNIs (e.g. Alibaba Cloud ACK Terway in ENI mode), dockur/windows fails at startup with: + +``` +❯ ERROR: This container does not support host mode networking! +``` + +or: + +``` +❯ ERROR: Status 1 while: ethtool -i "$VM_NET_DEV" +``` + +**Root cause**: The image's `network.sh` uses `ethtool -i` to check the network interface. ENI interfaces have real PCI bus-info, which triggers a false "host mode" detection. Standard veth-based CNIs (Calico, Flannel, Cilium) do NOT have this problem. + +**Solution**: Use the provided `main_fix_net.py` example, which patches the script at runtime and sets `NETWORK=slirp` for QEMU user-mode NAT: + +```shell +python main_fix_net.py +``` + +See [`main_fix_net.py`](./main_fix_net.py) for the full implementation. + +**How it works**: + +1. `sed` replaces three lines in `/run/network.sh` with empty variable assignments (`result=""`, `nic=""`, `bus=""`), preventing the ethtool check from aborting the script. +2. `NETWORK=slirp` tells the script to use QEMU's SLIRP networking (user-mode NAT), which doesn't require a real NIC. +3. `exec /usr/bin/tini -s /run/entry.sh` launches the original image entrypoint after patching. + +This approach keeps the Pod's independent IP and requires no image rebuild or `hostNetwork`. + +## Windows Sandbox from pool + +Use a pre-warmed K8s pool for faster Windows sandbox startup. + +### 1. Create the pool + +Apply the pool manifest (the image, resources, device mounts, and OEM scripts are pre-configured): + +```shell +kubectl apply -f pool-win-example.yaml +``` + +### 2. Start the OpenSandbox server [k8s] + +```shell +uv pip install opensandbox-server +opensandbox-server init-config ~/.sandbox.toml --example k8s +opensandbox-server +``` + +### 3. Run the pool example + +```shell +uv pip install opensandbox +python main_use_pool.py +``` + +The script acquires a sandbox from `pool-win-example`, prints endpoints, and runs a command. + +### Environment variables (pool) + +- `SANDBOX_DOMAIN`: Sandbox service address (default: `localhost:8080`) +- `SANDBOX_API_KEY`: API key if your server requires authentication + +## References + +- [Windows sandbox guide](../../docs/windows-sandbox.md) +- [dockur/windows](https://github.com/dockur/windows) diff --git a/examples/windows/main.py b/examples/windows/main.py new file mode 100644 index 000000000..e5075aadf --- /dev/null +++ b/examples/windows/main.py @@ -0,0 +1,61 @@ +# Copyright 2026 Alibaba Group Holding Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Minimal Windows sandbox example using dockur/windows.""" + +import os +from datetime import timedelta + +from opensandbox import SandboxSync +from opensandbox.config import ConnectionConfigSync +from opensandbox.models.sandboxes import PlatformSpec + + +def main() -> None: + cfg = ConnectionConfigSync( + domain=os.getenv("SANDBOX_DOMAIN", "localhost:8080"), + api_key=os.getenv("SANDBOX_API_KEY") or None, + request_timeout=timedelta(minutes=3), + use_server_proxy=True, + ) + + sbx = SandboxSync.create( + image="dockurr/windows:latest", + timeout=timedelta(hours=12), + ready_timeout=timedelta(minutes=30), + resource={ + "cpu": "4", + "memory": "8G", + "disk": "64G", + }, + env={"VERSION": "11"}, + platform=PlatformSpec(os="windows", arch="amd64"), + connection_config=cfg, + ) + + try: + print(f"Created: {sbx.id}") + print(f"execd: {sbx.get_endpoint(44772).endpoint}") + print(f"RDP: {sbx.get_endpoint(3389).endpoint}") + print(f"Web: {sbx.get_endpoint(8006).endpoint}") + + exec = sbx.commands.run("cmd /c echo Hello from Windows sandbox") + print(f"Command output: {exec.logs.stdout[0].text}") + finally: + sbx.kill() + sbx.close() + + +if __name__ == "__main__": + main() diff --git a/examples/windows/main_fix_net.py b/examples/windows/main_fix_net.py new file mode 100644 index 000000000..179433cda --- /dev/null +++ b/examples/windows/main_fix_net.py @@ -0,0 +1,92 @@ +# Copyright 2026 Alibaba Group Holding Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Windows sandbox example with ENI CNI network fix. + +Use this example on clusters with ENI-based CNIs (e.g. Alibaba Cloud ACK +Terway in ENI mode) where dockur/windows fails with: + + ERROR: This container does not support host mode networking! + +or: + + ERROR: Status 1 while: ethtool -i "$VM_NET_DEV" + +The fix patches /run/network.sh at container startup to bypass the +ethtool/bus-info check, then uses NETWORK=slirp for QEMU user-mode NAT. +Standard veth-based CNIs (Calico, Flannel, Cilium) do NOT need this fix. +""" + +import os +from datetime import timedelta + +from opensandbox import SandboxSync +from opensandbox.config import ConnectionConfigSync +from opensandbox.models.sandboxes import PlatformSpec + +# sed command to bypass the ethtool/grep checks in network.sh. +# Replaces three lines with empty variable assignments so that: +# - ethtool -i (would fail on ENI with real PCI bus-info) is skipped +# - grep on empty result (would fail with pipefail) is skipped +_NETWORK_PATCH_CMD = ( + "sed -i" + " -e 's/result=$(ethtool -i \"$VM_NET_DEV\")/result=\"\"/'" + " -e '/grep.*driver:/s/.*/ nic=\"\"/'" + " -e '/grep.*bus-info:/s/.*/ bus=\"\"/'" + " /run/network.sh" +) + +# Original dockur/windows ENTRYPOINT +_WINDOWS_ENTRYPOINT = "/usr/bin/tini -s /run/entry.sh" + + +def main() -> None: + cfg = ConnectionConfigSync( + domain=os.getenv("SANDBOX_DOMAIN", "localhost:8080"), + api_key=os.getenv("SANDBOX_API_KEY") or None, + request_timeout=timedelta(minutes=3), + use_server_proxy=True, + ) + + sbx = SandboxSync.create( + image="dockurr/windows:latest", + timeout=timedelta(hours=12), + ready_timeout=timedelta(minutes=120), + resource={"cpu": "8", "memory": "16G", "disk": "64G"}, + env={ + "VERSION": "11", + "NETWORK": "slirp", # Use QEMU built-in user-mode NAT + }, + # Patch network.sh then exec the original entrypoint + entrypoint=["/bin/sh", "-c", f"{_NETWORK_PATCH_CMD} && exec {_WINDOWS_ENTRYPOINT}"], + platform=PlatformSpec(os="windows", arch="amd64"), + connection_config=cfg, + ) + + try: + print(f"Created: {sbx.id}") + print(f"execd: {sbx.get_endpoint(44772).endpoint}") + print(f"RDP: {sbx.get_endpoint(3389).endpoint}") + print(f"Web: {sbx.get_endpoint(8006).endpoint}") + + result = sbx.commands.run("cmd /c echo Hello from Windows sandbox") + print(f"Command output: {result.logs.stdout[0].text}") + finally: + sbx.kill() + sbx.close() + + +if __name__ == "__main__": + main() diff --git a/examples/windows/main_use_pool.py b/examples/windows/main_use_pool.py new file mode 100644 index 000000000..8956ad114 --- /dev/null +++ b/examples/windows/main_use_pool.py @@ -0,0 +1,58 @@ +# Copyright 2026 Alibaba Group Holding Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Windows sandbox example using a pre-warmed K8s pool.""" + +import os +from datetime import timedelta + +from opensandbox import SandboxSync +from opensandbox.config import ConnectionConfigSync + + +def main() -> None: + cfg = ConnectionConfigSync( + domain=os.getenv("SANDBOX_DOMAIN", "localhost:8080"), + api_key=os.getenv("SANDBOX_API_KEY") or None, + request_timeout=timedelta(minutes=3), + use_server_proxy=True, + ) + + # Note: do NOT set entrypoint or env for Windows pool sandboxes. + # The pool template already configures the Windows guest (VERSION, + # CPU_CORES, etc.). Setting entrypoint or env would inject a + # taskTemplate that overrides the pool's pod spec, preventing + # dockur/windows from booting correctly. + sbx = SandboxSync.create( + image="dockurr/windows:latest", + timeout=timedelta(hours=1), + extensions={"poolRef": "pool-win-example"}, + connection_config=cfg, + ) + + try: + print(f"Created: {sbx.id}") + print(f"execd: {sbx.get_endpoint(44772).endpoint}") + print(f"RDP: {sbx.get_endpoint(3389).endpoint}") + print(f"Web: {sbx.get_endpoint(8006).endpoint}") + + exec = sbx.commands.run("cmd /c echo Hello from Windows sandbox") + print(f"Command output: {exec.logs.stdout[0].text}") + finally: + sbx.kill() + sbx.close() + + +if __name__ == "__main__": + main() diff --git a/examples/windows/pool-win-example.yaml b/examples/windows/pool-win-example.yaml new file mode 100644 index 000000000..45283d992 --- /dev/null +++ b/examples/windows/pool-win-example.yaml @@ -0,0 +1,92 @@ +apiVersion: sandbox.opensandbox.io/v1alpha1 +kind: Pool +metadata: + labels: + app.kubernetes.io/name: sandbox-k8s + app.kubernetes.io/managed-by: kustomize + name: pool-win-example + namespace: opensandbox +spec: + template: + metadata: + labels: + app: example + spec: + containers: + - env: + - name: VERSION + value: "11" + - name: CPU_CORES + value: "8" + - name: RAM_SIZE + value: 16G + - name: DISK_SIZE + value: 64G + - name: USER_PORTS + value: 44772,8080,3389,8006 + image: dockurr/windows:latest + imagePullPolicy: IfNotPresent + name: sandbox + resources: + limits: + cpu: "8" + memory: 18Gi + requests: + cpu: "8" + memory: 18Gi + securityContext: + capabilities: + add: + - NET_ADMIN + - NET_RAW + privileged: true + volumeMounts: + - mountPath: /opt/opensandbox/bin + name: opensandbox-bin + - mountPath: /oem + name: opensandbox-win-oem + - mountPath: /dev/kvm + name: opensandbox-win-kvm + - mountPath: /dev/net/tun + name: opensandbox-win-tun + - mountPath: /storage + name: opensandbox-win-storage + initContainers: + - args: + - cp ./install.bat /oem/install.bat && cp ./execd.exe /oem/execd.exe && chmod + 0644 /oem/install.bat /oem/execd.exe + command: + - /bin/sh + - -c + image: sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/execd:v1.0.18 + name: execd-installer + volumeMounts: + - mountPath: /opt/opensandbox/bin + name: opensandbox-bin + - mountPath: /oem + name: opensandbox-win-oem + nodeSelector: + kubernetes.io/arch: amd64 + restartPolicy: Always + tolerations: + - operator: Exists + volumes: + - emptyDir: {} + name: opensandbox-bin + - emptyDir: {} + name: opensandbox-win-oem + - hostPath: + path: /dev/kvm + type: CharDevice + name: opensandbox-win-kvm + - hostPath: + path: /dev/net/tun + type: CharDevice + name: opensandbox-win-tun + - emptyDir: {} + name: opensandbox-win-storage + capacitySpec: + bufferMax: 3 + bufferMin: 1 + poolMax: 5 + poolMin: 0 \ No newline at end of file diff --git a/kubernetes/Dockerfile b/kubernetes/Dockerfile index b2026411c..8ce31232f 100644 --- a/kubernetes/Dockerfile +++ b/kubernetes/Dockerfile @@ -48,6 +48,8 @@ COPY internal/ internal/ # by leaving it empty we can ensure that the container and binary shipped on it will have the same platform. RUN echo "Building for $TARGETOS/$TARGETARCH" ARG PACKAGE=./cmd/controller +ARG COMMIT_ID=unknown +ARG BUILD_DATE=unknown RUN if [ -n "${CC}" ]; then export CC; fi; \ if [ -n "${CXX}" ]; then export CXX; fi; \ export CGO_ENABLED="${CGO_ENABLED}" GOOS="${TARGETOS:-linux}" GOARCH="${TARGETARCH}" \ @@ -55,7 +57,7 @@ RUN if [ -n "${CC}" ]; then export CC; fi; \ CGO_CXXFLAGS="${CGO_CXXFLAGS:-${CXXFLAGS}}" \ CGO_LDFLAGS="${CGO_LDFLAGS}"; \ go build ${GOFLAGS} -trimpath -buildvcs=false \ - -ldflags "${LDFLAGS} -buildid= -B none" \ + -ldflags "${LDFLAGS} -buildid= -B none -X main.commitID=${COMMIT_ID} -X main.buildDate=${BUILD_DATE}" \ -o server ${PACKAGE} # Use golang image as base to ensure nsenter (util-linux) is available diff --git a/kubernetes/Makefile b/kubernetes/Makefile index 232a2346e..7357ceecd 100644 --- a/kubernetes/Makefile +++ b/kubernetes/Makefile @@ -56,6 +56,8 @@ CONTROLLER_IMG ?= controller:dev TASK_EXECUTOR_IMG ?= task-executor:dev # IMAGE_COMMITTER_IMG defines the image for the image-committer service. IMAGE_COMMITTER_IMG ?= image-committer:dev +# SNAPSHOT_REGISTRY defines the OCI registry used by the controller for snapshot images. +SNAPSHOT_REGISTRY ?= docker-registry.default.svc.cluster.local:5000 # Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set) ifeq (,$(shell go env GOBIN)) @@ -364,7 +366,10 @@ uninstall: manifests kustomize ## Uninstall CRDs from the K8s cluster specified .PHONY: deploy deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config. cd config/manager && $(KUSTOMIZE) edit set image controller=${CONTROLLER_IMG} - $(KUSTOMIZE) build config/default | $(KUBECTL) apply -f - + $(KUSTOMIZE) build config/default | \ + sed 's|--snapshot-registry=docker-registry.default.svc.cluster.local:5000|--snapshot-registry=$(SNAPSHOT_REGISTRY)|' | \ + sed 's|--image-committer-image=image-committer:dev|--image-committer-image=$(IMAGE_COMMITTER_IMG)|' | \ + $(KUBECTL) apply -f - .PHONY: undeploy undeploy: kustomize ## Undeploy controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion. diff --git a/kubernetes/build.sh b/kubernetes/build.sh index 741e32a4f..f2d3234ee 100755 --- a/kubernetes/build.sh +++ b/kubernetes/build.sh @@ -31,21 +31,28 @@ BUILD_ARGS=() for name in GOFLAGS LDFLAGS CGO_ENABLED CC CXX CFLAGS CXXFLAGS CGO_CFLAGS CGO_CXXFLAGS CGO_LDFLAGS; do build_arg_if_set "${name}" done +BUILD_ARGS+=(--build-arg "COMMIT_ID=$(git rev-parse --short HEAD)") +BUILD_ARGS+=(--build-arg "BUILD_DATE=$(date -u +%Y-%m-%dT%H:%M:%SZ)") mkdir -p "$(dirname "${BUILD_METADATA_FILE}")" DOCKERHUB_REPO="opensandbox" ACR_REPO="sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox" # Component specific settings +DOCKERFILE="Dockerfile" if [ "$COMPONENT" == "controller" ]; then IMAGE_NAME="controller" BUILD_ARG="--build-arg PACKAGE=./cmd/controller" elif [ "$COMPONENT" == "task-executor" ]; then IMAGE_NAME="task-executor" BUILD_ARG="--build-arg PACKAGE=cmd/task-executor/main.go --build-arg USERID=0" +elif [ "$COMPONENT" == "image-committer" ]; then + IMAGE_NAME="image-committer" + BUILD_ARG="" + DOCKERFILE="Dockerfile.image-committer" else echo "Error: Unknown component: $COMPONENT" - echo "Available components: controller, task-executor" + echo "Available components: controller, task-executor, image-committer" exit 1 fi @@ -69,7 +76,7 @@ if [ "$PUSH" == "true" ]; then -t "${ACR_REPO}/${IMAGE_NAME}:${TAG}" \ --metadata-file "${BUILD_METADATA_FILE}" \ --push \ - -f Dockerfile \ + -f "$DOCKERFILE" \ . echo "=========================================" @@ -84,7 +91,7 @@ else $BUILD_ARG \ "${BUILD_ARGS[@]}" \ -t ${IMAGE_NAME}:${TAG} \ - -f Dockerfile \ + -f "$DOCKERFILE" \ --load \ . diff --git a/kubernetes/charts/opensandbox-controller/Chart.yaml b/kubernetes/charts/opensandbox-controller/Chart.yaml index cbb32533c..b3711fa4e 100644 --- a/kubernetes/charts/opensandbox-controller/Chart.yaml +++ b/kubernetes/charts/opensandbox-controller/Chart.yaml @@ -2,8 +2,8 @@ apiVersion: v2 name: opensandbox-controller description: A Kubernetes operator for managing sandbox environments with resource pooling and batch delivery type: application -version: 0.1.0 -appVersion: "0.1.0" +version: 0.2.0 +appVersion: "0.2.0" keywords: - sandbox diff --git a/kubernetes/charts/opensandbox-controller/README.md b/kubernetes/charts/opensandbox-controller/README.md index 644e073e3..990eb017e 100644 --- a/kubernetes/charts/opensandbox-controller/README.md +++ b/kubernetes/charts/opensandbox-controller/README.md @@ -164,7 +164,7 @@ The chart exposes the snapshot-related settings below: ```yaml controller: snapshot: - imageCommitterImage: my-registry/image-committer:v1.0.0 + imageCommitterImage: my-registry/image-committer:v0.1.0 commitJobTimeout: 15m registry: my-registry/snapshots registryInsecure: false diff --git a/kubernetes/charts/opensandbox-controller/templates/deployment.yaml b/kubernetes/charts/opensandbox-controller/templates/deployment.yaml index a94ecbe45..bbbe7a4cc 100644 --- a/kubernetes/charts/opensandbox-controller/templates/deployment.yaml +++ b/kubernetes/charts/opensandbox-controller/templates/deployment.yaml @@ -61,6 +61,9 @@ spec: {{- if .Values.controller.snapshot.imageCommitterImage }} - --image-committer-image={{ .Values.controller.snapshot.imageCommitterImage }} {{- end }} + {{- if .Values.controller.snapshot.containerdSocketPath }} + - --containerd-socket-path={{ .Values.controller.snapshot.containerdSocketPath }} + {{- end }} {{- if .Values.controller.snapshot.commitJobTimeout }} - --commit-job-timeout={{ .Values.controller.snapshot.commitJobTimeout }} {{- end }} diff --git a/kubernetes/charts/opensandbox-controller/values.yaml b/kubernetes/charts/opensandbox-controller/values.yaml index dd969a298..97a35dc3c 100644 --- a/kubernetes/charts/opensandbox-controller/values.yaml +++ b/kubernetes/charts/opensandbox-controller/values.yaml @@ -47,7 +47,10 @@ controller: # -- Pause/Resume snapshot configuration snapshot: # -- Image used for commit operations (must contain nerdctl tool) - imageCommitterImage: "image-committer:dev" + # DockerHub: opensandbox/image-committer:v0.1.0 + imageCommitterImage: "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/image-committer:v0.1.0" + # -- Containerd socket path of host + containerdSocketPath: "/var/run/containerd/containerd.sock" # -- Timeout duration for commit jobs commitJobTimeout: "10m" # -- OCI registry prefix used for snapshot images. diff --git a/kubernetes/charts/opensandbox-server/templates/server.yaml b/kubernetes/charts/opensandbox-server/templates/server.yaml index b2b5def22..2dcf1d7fa 100644 --- a/kubernetes/charts/opensandbox-server/templates/server.yaml +++ b/kubernetes/charts/opensandbox-server/templates/server.yaml @@ -114,6 +114,9 @@ spec: mountPath: /etc/opensandbox/config.toml subPath: config.toml readOnly: true + {{- with .Values.server.volumeMounts }} + {{- toYaml . | nindent 12 }} + {{- end }} livenessProbe: httpGet: path: /health @@ -134,6 +137,9 @@ spec: - name: config configMap: name: {{ include "opensandbox-server.fullname" . }}-config + {{- with .Values.server.volumes }} + {{- toYaml . | nindent 8 }} + {{- end }} {{- with .Values.server.affinity }} affinity: {{- toYaml . | nindent 8 }} diff --git a/kubernetes/charts/opensandbox-server/values.yaml b/kubernetes/charts/opensandbox-server/values.yaml index 58a90bc4f..afdde64de 100644 --- a/kubernetes/charts/opensandbox-server/values.yaml +++ b/kubernetes/charts/opensandbox-server/values.yaml @@ -25,7 +25,7 @@ server: # -- Server image configuration image: repository: sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/server - tag: "v0.1.13" + tag: "v0.1.14" # -- Number of server replicas replicaCount: 2 @@ -41,6 +41,8 @@ server: tolerations: [] affinity: {} + volumeMounts: [] + volumes: [] # Gateway (components/ingress): when enabled, writes config [ingress] and deploys the gateway gateway: @@ -83,7 +85,7 @@ configToml: | [runtime] type = "kubernetes" - execd_image = "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/execd:v1.0.15" + execd_image = "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/execd:v1.0.18" [kubernetes] kubeconfig_path = "" @@ -96,5 +98,5 @@ configToml: | batchsandbox_template_file = "/etc/opensandbox/example.batchsandbox-template.yaml" [egress] - image = "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/egress:v1.0.11" + image = "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/egress:v1.0.12" mode = "dns+nft" diff --git a/kubernetes/charts/opensandbox/Chart.lock b/kubernetes/charts/opensandbox/Chart.lock index c455f5efc..eb1c59e50 100644 --- a/kubernetes/charts/opensandbox/Chart.lock +++ b/kubernetes/charts/opensandbox/Chart.lock @@ -1,9 +1,9 @@ dependencies: - name: opensandbox-controller repository: file://../opensandbox-controller - version: 0.1.0 + version: 0.2.0 - name: opensandbox-server repository: file://../opensandbox-server version: 0.1.0 -digest: sha256:c66976fab3f4eea75ec3004c1842079d754387ea430c56cce5514e4f457ee40c -generated: "2026-03-04T17:56:49.467373+08:00" +digest: sha256:b88aa0bfffb5e30aa46163794cb74aa25157ee4b4a437c22867df19633b2a89f +generated: "2026-05-15T14:39:18.571067+08:00" diff --git a/kubernetes/charts/opensandbox/Chart.yaml b/kubernetes/charts/opensandbox/Chart.yaml index 77842db5b..714d85699 100644 --- a/kubernetes/charts/opensandbox/Chart.yaml +++ b/kubernetes/charts/opensandbox/Chart.yaml @@ -16,8 +16,8 @@ apiVersion: v2 name: opensandbox description: All-in-one Helm chart for deploying OpenSandbox controller and server type: application -version: 0.1.0 -appVersion: "0.1.0" +version: 0.2.0 +appVersion: "0.2.0" keywords: - sandbox @@ -40,7 +40,7 @@ kubeVersion: ">=1.21.1-0" dependencies: - name: opensandbox-controller - version: "0.1.0" + version: "0.2.0" repository: "file://../opensandbox-controller" - name: opensandbox-server version: "0.1.0" diff --git a/kubernetes/charts/opensandbox/values.yaml b/kubernetes/charts/opensandbox/values.yaml index b20b88bcf..6a9c968d4 100644 --- a/kubernetes/charts/opensandbox/values.yaml +++ b/kubernetes/charts/opensandbox/values.yaml @@ -9,7 +9,8 @@ opensandbox-controller: logLevel: info replicaCount: 1 snapshot: - imageCommitterImage: image-committer:dev + # DockerHub: opensandbox/image-committer:v0.1.0 + imageCommitterImage: sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/image-committer:v0.1.0 commitJobTimeout: 10m registry: "" registryInsecure: false diff --git a/kubernetes/cmd/controller/main.go b/kubernetes/cmd/controller/main.go index b82234e83..2eccbaf78 100644 --- a/kubernetes/cmd/controller/main.go +++ b/kubernetes/cmd/controller/main.go @@ -43,11 +43,17 @@ import ( "github.com/alibaba/OpenSandbox/sandbox-k8s/internal/controller" poolassign "github.com/alibaba/OpenSandbox/sandbox-k8s/internal/controller/poolassign" cryptoutil "github.com/alibaba/OpenSandbox/sandbox-k8s/internal/utils/crypto" + "github.com/alibaba/OpenSandbox/sandbox-k8s/internal/utils/expectations" "github.com/alibaba/OpenSandbox/sandbox-k8s/internal/utils/fieldindex" "github.com/alibaba/OpenSandbox/sandbox-k8s/internal/utils/logging" // +kubebuilder:scaffold:imports ) +var ( + commitID = "unknown" + buildDate = "unknown" +) + const ( defaultBatchSandboxConcurrency = 32 defaultPoolConcurrency = 16 @@ -198,6 +204,9 @@ func main() { var imageCommitterImage string flag.StringVar(&imageCommitterImage, "image-committer-image", "image-committer:dev", "The image used for commit operations (contains nerdctl tool).") + var containerdSocketPath string + flag.StringVar(&containerdSocketPath, "containerd-socket-path", controller.ContainerdSocketPath, "Containerd socket path") + // Commit job timeout var commitJobTimeout time.Duration flag.DurationVar(&commitJobTimeout, "commit-job-timeout", 10*time.Minute, "The timeout duration for commit jobs.") @@ -234,6 +243,8 @@ func main() { logger := logging.NewLoggerWithZapOptions(logOpts) ctrl.SetLogger(logger) + setupLog.Info("Starting controller", "commitID", commitID, "buildDate", buildDate) + // if the enable-http2 flag is false (the default), http/2 should be disabled // due to its vulnerabilities. More specifically, disabling http/2 will // prevent from being vulnerable to the HTTP/2 Stream Cancellation and @@ -372,11 +383,12 @@ func main() { } config := ctrl.GetConfigOrDie() + config.UserAgent = "sandbox-k8s-controller/1.0" // Set client rate limiter if specified - if kubeClientQPS > 0 { + if kubeClientQPS != 0 { config.QPS = float32(kubeClientQPS) } - if kubeClientBurst > 0 { + if kubeClientBurst != 0 { config.Burst = kubeClientBurst } @@ -419,11 +431,12 @@ func main() { } if err := (&controller.BatchSandboxReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), - Recorder: mgr.GetEventRecorderFor("batchsandbox-controller"), - ResumePullSecret: resumePullSecret, - ProfileStore: profileStore, + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Recorder: mgr.GetEventRecorderFor("batchsandbox-controller"), + ResumePullSecret: resumePullSecret, + ProfileStore: profileStore, + StatusRVExpectation: expectations.NewResourceVersionExpectation(), }).SetupWithManager(mgr, batchSandboxConcurrency); err != nil { setupLog.Error(err, "unable to create controller", "controller", "BatchSandbox") os.Exit(1) @@ -443,6 +456,7 @@ func main() { Scheme: mgr.GetScheme(), Recorder: mgr.GetEventRecorderFor("sandboxsnapshot-controller"), ImageCommitterImage: imageCommitterImage, + ContainerdSocketPath: containerdSocketPath, CommitJobTimeout: commitJobTimeout, SnapshotRegistry: snapshotRegistry, SnapshotRegistryInsecure: snapshotRegistryInsecure, diff --git a/kubernetes/config/manager/manager.yaml b/kubernetes/config/manager/manager.yaml index 34190590d..ba904c456 100644 --- a/kubernetes/config/manager/manager.yaml +++ b/kubernetes/config/manager/manager.yaml @@ -67,6 +67,7 @@ spec: - --snapshot-registry-insecure=true - --snapshot-push-secret=registry-snapshot-push-secret - --resume-pull-secret=registry-pull-secret + - --image-committer-image=image-committer:dev image: controller:dev name: manager ports: [] diff --git a/kubernetes/config/samples/sandbox_v1alpha1_pool.yaml b/kubernetes/config/samples/sandbox_v1alpha1_pool.yaml index b4d8ebb7a..6dc7b04ee 100644 --- a/kubernetes/config/samples/sandbox_v1alpha1_pool.yaml +++ b/kubernetes/config/samples/sandbox_v1alpha1_pool.yaml @@ -31,7 +31,7 @@ spec: - name: opensandbox-bin mountPath: /opt/opensandbox/bin - name: execd-installer - image: sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/execd:v1.0.15 + image: sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/execd:v1.0.18 command: [ "/bin/sh", "-c" ] args: - | diff --git a/kubernetes/config/samples/sandbox_v1alpha1_pool_restart.yaml b/kubernetes/config/samples/sandbox_v1alpha1_pool_restart.yaml index dc303b84c..55ae43ad9 100644 --- a/kubernetes/config/samples/sandbox_v1alpha1_pool_restart.yaml +++ b/kubernetes/config/samples/sandbox_v1alpha1_pool_restart.yaml @@ -56,7 +56,7 @@ spec: command: - /bin/sh - -c - image: sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/execd:v1.0.15 + image: sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/execd:v1.0.18 name: execd-installer volumeMounts: - mountPath: /opt/opensandbox/bin diff --git a/kubernetes/internal/controller/allocator.go b/kubernetes/internal/controller/allocator.go index 985b87e2c..436c22242 100644 --- a/kubernetes/internal/controller/allocator.go +++ b/kubernetes/internal/controller/allocator.go @@ -22,6 +22,7 @@ import ( "sync" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" logf "sigs.k8s.io/controller-runtime/pkg/log" @@ -268,24 +269,38 @@ func NewAnnoAllocationSyncer(client client.Client) AllocationSyncer { } func (syncer *annoAllocationSyncer) SetAllocation(ctx context.Context, sandbox *sandboxv1alpha1.BatchSandbox, allocation *SandboxAllocation) error { - old, ok := sandbox.DeepCopyObject().(*sandboxv1alpha1.BatchSandbox) - if !ok { - return fmt.Errorf("invalid object") + js, err := json.Marshal(allocation) + if err != nil { + return err } anno := sandbox.GetAnnotations() if anno == nil { anno = make(map[string]string) } - js, err := json.Marshal(allocation) + anno[AnnoAllocStatusKey] = string(js) + sandbox.SetAnnotations(anno) + + needAddFinalizer := !controllerutil.ContainsFinalizer(sandbox, FinalizerPoolAllocation) + if needAddFinalizer { + sandbox.SetFinalizers(append(sandbox.GetFinalizers(), FinalizerPoolAllocation)) + } + + meta := map[string]any{ + "annotations": map[string]string{ + AnnoAllocStatusKey: string(js), + }, + } + if needAddFinalizer { + meta["finalizers"] = sandbox.GetFinalizers() + } + patchData, err := json.Marshal(map[string]any{"metadata": meta}) if err != nil { return err } - anno[AnnoAllocStatusKey] = string(js) - sandbox.SetAnnotations(anno) - // Add finalizer to ensure the sandbox is not deleted before all pods are recycled. - controllerutil.AddFinalizer(sandbox, FinalizerPoolAllocation) - patch := client.MergeFrom(old) - return syncer.client.Patch(ctx, sandbox, patch) + obj := &sandboxv1alpha1.BatchSandbox{} + obj.Name = sandbox.Name + obj.Namespace = sandbox.Namespace + return syncer.client.Patch(ctx, obj, client.RawPatch(types.MergePatchType, patchData)) } func (syncer *annoAllocationSyncer) GetAllocation(ctx context.Context, sandbox *sandboxv1alpha1.BatchSandbox) (*SandboxAllocation, error) { @@ -340,20 +355,18 @@ func (syncer *annoAllocationSyncer) GetReleased(ctx context.Context, sandbox *sa } func (syncer *annoAllocationSyncer) SetReleased(ctx context.Context, sandbox *sandboxv1alpha1.BatchSandbox, released *AllocationReleased) error { - old, ok := sandbox.DeepCopyObject().(*sandboxv1alpha1.BatchSandbox) - if !ok { - return fmt.Errorf("invalid object") + js, err := json.Marshal(released) + if err != nil { + return err } anno := sandbox.GetAnnotations() if anno == nil { anno = make(map[string]string) } - js, err := json.Marshal(released) - if err != nil { - return err - } anno[AnnoAllocReleasedKey] = string(js) sandbox.SetAnnotations(anno) + + needRemoveFinalizer := false // If the sandbox is being deleted and all allocated pods have been released, // remove the finalizer so the sandbox can be garbage collected. if !sandbox.DeletionTimestamp.IsZero() { @@ -372,12 +385,34 @@ func (syncer *annoAllocationSyncer) SetReleased(ctx context.Context, sandbox *sa break } } - if allReleased { - controllerutil.RemoveFinalizer(sandbox, FinalizerPoolAllocation) + if allReleased && controllerutil.ContainsFinalizer(sandbox, FinalizerPoolAllocation) { + needRemoveFinalizer = true + filtered := make([]string, 0, len(sandbox.GetFinalizers())) + for _, f := range sandbox.GetFinalizers() { + if f != FinalizerPoolAllocation { + filtered = append(filtered, f) + } + } + sandbox.SetFinalizers(filtered) } } - patch := client.MergeFrom(old) - return syncer.client.Patch(ctx, sandbox, patch) + + meta := map[string]any{ + "annotations": map[string]string{ + AnnoAllocReleasedKey: string(js), + }, + } + if needRemoveFinalizer { + meta["finalizers"] = sandbox.GetFinalizers() + } + patchData, err := json.Marshal(map[string]any{"metadata": meta}) + if err != nil { + return err + } + obj := &sandboxv1alpha1.BatchSandbox{} + obj.Name = sandbox.Name + obj.Namespace = sandbox.Namespace + return syncer.client.Patch(ctx, obj, client.RawPatch(types.MergePatchType, patchData)) } type AllocSpec struct { @@ -425,6 +460,12 @@ func (allocator *defaultAllocator) Schedule(ctx context.Context, spec *AllocSpec return nil, err } + // Build a set of live pool pod names for dead-pod detection during allocation requests. + livePodSet := make(map[string]struct{}, len(spec.Pods)) + for _, p := range spec.Pods { + livePodSet[p.Name] = struct{}{} + } + // Fetch pool allocation once and reuse it for both stale-sandbox cleanup and available-pod filtering. // This avoids a double store read on every reconcile. podAllocation, err := allocator.GetPoolAllocation(ctx, spec.Pool) @@ -438,7 +479,7 @@ func (allocator *defaultAllocator) Schedule(ctx context.Context, spec *AllocSpec // handles them without any special-casing outside this function. // Terminating sandboxes are handled inside getSandboxRequest: they receive no new supplement and // all unreleased pods are queued for release. - allRequest, err := allocator.getAllRequest(ctx, spec.Sandboxes, podAllocation) + allRequest, err := allocator.getAllRequest(ctx, spec.Sandboxes, podAllocation, livePodSet) if err != nil { return nil, err } @@ -459,13 +500,13 @@ func (allocator *defaultAllocator) Schedule(ctx context.Context, spec *AllocSpec // orphan entries for pods in podAllocation whose sandbox is no longer in the sandboxes list // (e.g. force-deleted). Orphan entries carry PodSupplement=0 and ToRelease set to the orphan // pods so the normal recycle path handles them without special-casing in the caller. -func (allocator *defaultAllocator) getAllRequest(ctx context.Context, sandboxes []*sandboxv1alpha1.BatchSandbox, podAllocation map[string]string) ([]*algorithm.SandboxRequest, error) { +func (allocator *defaultAllocator) getAllRequest(ctx context.Context, sandboxes []*sandboxv1alpha1.BatchSandbox, podAllocation map[string]string, livePodSet map[string]struct{}) ([]*algorithm.SandboxRequest, error) { log := logf.FromContext(ctx) existingSandboxes := make(map[string]struct{}, len(sandboxes)) allRequest := make([]*algorithm.SandboxRequest, 0, len(sandboxes)) for _, sandbox := range sandboxes { existingSandboxes[sandbox.Name] = struct{}{} - request, err := allocator.getSandboxRequest(ctx, sandbox) + request, err := allocator.getSandboxRequest(ctx, sandbox, livePodSet) if err != nil { return nil, err } @@ -488,7 +529,7 @@ func (allocator *defaultAllocator) getAllRequest(ctx context.Context, sandboxes return allRequest, nil } -func (allocator *defaultAllocator) getSandboxRequest(ctx context.Context, sandbox *sandboxv1alpha1.BatchSandbox) (*algorithm.SandboxRequest, error) { +func (allocator *defaultAllocator) getSandboxRequest(ctx context.Context, sandbox *sandboxv1alpha1.BatchSandbox, livePodSet map[string]struct{}) (*algorithm.SandboxRequest, error) { log := logf.FromContext(ctx) allocated, err := allocator.GetSandboxAllocation(ctx, sandbox) if err != nil { @@ -504,15 +545,37 @@ func (allocator *defaultAllocator) getSandboxRequest(ctx context.Context, sandbo releasedSet[r] = struct{}{} } + // Filter out pods that no longer exist in the pool (e.g. externally deleted). + // Dead pods are treated as released so the sandbox can receive replacement allocations. + liveAllocated := make([]string, 0, len(allocated)) + deadPods := make([]string, 0) + for _, p := range allocated { + if _, exists := releasedSet[p]; exists { + // Already released, keep in allocated for bookkeeping consistency. + liveAllocated = append(liveAllocated, p) + continue + } + if _, alive := livePodSet[p]; alive { + liveAllocated = append(liveAllocated, p) + } else { + deadPods = append(deadPods, p) + } + } + if len(deadPods) > 0 { + log.Info("Detected dead allocated pods, queuing for release to trigger re-allocation", + "sandbox", sandbox.Name, "deadPods", deadPods) + } + // Terminating sandboxes should not receive new allocations. // Queue all unreleased allocated pods for release and set supplement to zero. if !sandbox.DeletionTimestamp.IsZero() { toRelease := make([]string, 0) - for _, p := range allocated { + for _, p := range liveAllocated { if _, ok := releasedSet[p]; !ok { toRelease = append(toRelease, p) } } + toRelease = append(toRelease, deadPods...) if len(toRelease) > 0 { log.Info("Queuing terminating sandbox pods for release", "sandbox", sandbox.Name, "pods", toRelease) } @@ -536,15 +599,19 @@ func (allocator *defaultAllocator) getSandboxRequest(ctx context.Context, sandbo toRelease = append(toRelease, r) } } + // Also queue dead pods for release so their allocation records are cleaned up. + toRelease = append(toRelease, deadPods...) replica := int32(0) if sandbox.Spec.Replicas != nil { replica = *sandbox.Spec.Replicas } + // Use liveAllocated count (excluding dead pods) to compute supplement, + // so deleted pods trigger re-allocation from the pool. supplement := int32(0) - if replica-int32(len(allocated)) > 0 { - supplement = replica - int32(len(allocated)) + if replica-int32(len(liveAllocated)) > 0 { + supplement = replica - int32(len(liveAllocated)) } return &algorithm.SandboxRequest{ diff --git a/kubernetes/internal/controller/batchsandbox_controller.go b/kubernetes/internal/controller/batchsandbox_controller.go index 0fe1cf516..63e76c98a 100644 --- a/kubernetes/internal/controller/batchsandbox_controller.go +++ b/kubernetes/internal/controller/batchsandbox_controller.go @@ -66,10 +66,11 @@ type taskScheduleResult struct { // BatchSandboxReconciler reconciles a BatchSandbox object type BatchSandboxReconciler struct { client.Client - Scheme *runtime.Scheme - Recorder record.EventRecorder - ProfileStore *poolassign.ProfileStore - taskSchedulers sync.Map + Scheme *runtime.Scheme + Recorder record.EventRecorder + ProfileStore *poolassign.ProfileStore + taskSchedulers sync.Map + StatusRVExpectation expectations.ResourceVersionExpectation // ResumePullSecret is the K8s Secret name for pulling snapshot images during resume. ResumePullSecret string } @@ -90,11 +91,13 @@ type BatchSandboxReconciler struct { // // For more details, check Reconcile and its Result here: // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.21.0/pkg/reconcile -func (r *BatchSandboxReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { +func (r *BatchSandboxReconciler) Reconcile(ctx context.Context, req ctrl.Request) (result ctrl.Result, retErr error) { log := logf.FromContext(ctx) + start := time.Now() var aggErrors []error defer func() { _ = DurationStore.Pop(req.String()) + log.Info("Reconcile finished", "duration", time.Since(start).String(), "requeueAfter", result.RequeueAfter.String(), "error", retErr) }() batchSbx := &sandboxv1alpha1.BatchSandbox{} if err := r.Get(ctx, client.ObjectKey{ @@ -192,6 +195,14 @@ func (r *BatchSandboxReconciler) Reconcile(ctx context.Context, req ctrl.Request } runtimeView := buildRuntimeView(batchSbx, pods) + // Ensure PauseObservedGeneration is up-to-date so the status patch ACKs the + // current generation without requiring a dedicated API call. + // Skip during Resuming: a newer generation may carry a queued pause request + // that must remain unacknowledged until resume completes and handlePause runs. + if batchSbx.Status.Phase != sandboxv1alpha1.BatchSandboxPhaseResuming && + runtimeView.status.PauseObservedGeneration < batchSbx.Generation { + runtimeView.status.PauseObservedGeneration = batchSbx.Generation + } if batchSbx.Status.Phase == sandboxv1alpha1.BatchSandboxPhasePaused { r.deleteTaskScheduler(ctx, batchSbx) @@ -210,9 +221,14 @@ func (r *BatchSandboxReconciler) Reconcile(ctx context.Context, req ctrl.Request } } - aggErrors = append(aggErrors, r.persistRuntimeView(ctx, batchSbx, runtimeView)...) + requeue, persistErrors := r.persistRuntimeView(ctx, batchSbx, runtimeView) + aggErrors = append(aggErrors, persistErrors...) - return reconcile.Result{RequeueAfter: DurationStore.Pop(req.String())}, gerrors.Join(aggErrors...) + requeueAfter := DurationStore.Pop(req.String()) + if requeue > 0 && (requeueAfter == 0 || requeue < requeueAfter) { + requeueAfter = requeue + } + return reconcile.Result{RequeueAfter: requeueAfter}, gerrors.Join(aggErrors...) } func calPodIndex(poolStrategy strategy.PoolStrategy, batchSbx *sandboxv1alpha1.BatchSandbox, pods []*corev1.Pod) (map[string]int, error) { diff --git a/kubernetes/internal/controller/batchsandbox_pause_resume.go b/kubernetes/internal/controller/batchsandbox_pause_resume.go index 184f185bd..eb357a036 100644 --- a/kubernetes/internal/controller/batchsandbox_pause_resume.go +++ b/kubernetes/internal/controller/batchsandbox_pause_resume.go @@ -209,10 +209,9 @@ func (r *BatchSandboxReconciler) dispatchPauseResume(ctx context.Context, bs *sa result, err := r.handleResume(ctx, bs) return result, true, err } - log.Info("Dispatch: ACK only", "generation", generation, "pauseObservedGeneration", pauseObservedGen) - if err := r.ackPauseGeneration(ctx, bs); err != nil { - return ctrl.Result{}, true, err - } + // No pause intent — skip the dedicated ACK API call. The normal flow's + // persistRuntimeView will update PauseObservedGeneration in its status patch. + log.Info("Dispatch: no pause intent, deferring ACK to status patch", "generation", generation, "pauseObservedGeneration", pauseObservedGen) return ctrl.Result{}, false, nil } @@ -479,8 +478,9 @@ func (r *BatchSandboxReconciler) completePause(ctx context.Context, bs *sandboxv r.deleteTaskScheduler(ctx, bs) + var latest *sandboxv1alpha1.BatchSandbox if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { - latest := &sandboxv1alpha1.BatchSandbox{} + latest = &sandboxv1alpha1.BatchSandbox{} if err := r.Get(ctx, types.NamespacedName{Namespace: bs.Namespace, Name: bs.Name}, latest); err != nil { return err } @@ -496,6 +496,7 @@ func (r *BatchSandboxReconciler) completePause(ctx context.Context, bs *sandboxv }); err != nil { return err } + r.StatusRVExpectation.Expect(latest) return nil } @@ -584,8 +585,9 @@ func (r *BatchSandboxReconciler) continueResume(ctx context.Context, bs *sandbox } func (r *BatchSandboxReconciler) ackPauseGeneration(ctx context.Context, bs *sandboxv1alpha1.BatchSandbox) error { + var latest *sandboxv1alpha1.BatchSandbox if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { - latest := &sandboxv1alpha1.BatchSandbox{} + latest = &sandboxv1alpha1.BatchSandbox{} if err := r.Get(ctx, types.NamespacedName{Namespace: bs.Namespace, Name: bs.Name}, latest); err != nil { return err } @@ -595,14 +597,16 @@ func (r *BatchSandboxReconciler) ackPauseGeneration(ctx context.Context, bs *san }); err != nil { return err } + r.StatusRVExpectation.Expect(latest) bs.Status.PauseObservedGeneration = bs.Generation applyBatchSandboxPhaseConditions(&bs.Status) return nil } func (r *BatchSandboxReconciler) ackPauseWithPhase(ctx context.Context, bs *sandboxv1alpha1.BatchSandbox, phase sandboxv1alpha1.BatchSandboxPhase, _ string) error { + var latest *sandboxv1alpha1.BatchSandbox if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { - latest := &sandboxv1alpha1.BatchSandbox{} + latest = &sandboxv1alpha1.BatchSandbox{} if err := r.Get(ctx, types.NamespacedName{Namespace: bs.Namespace, Name: bs.Name}, latest); err != nil { return err } @@ -613,6 +617,7 @@ func (r *BatchSandboxReconciler) ackPauseWithPhase(ctx context.Context, bs *sand }); err != nil { return err } + r.StatusRVExpectation.Expect(latest) bs.Status.PauseObservedGeneration = bs.Generation bs.Status.Phase = phase applyBatchSandboxPhaseConditions(&bs.Status) @@ -640,8 +645,9 @@ func (r *BatchSandboxReconciler) setCondition( reason string, message string, ) error { - return retry.RetryOnConflict(retry.DefaultBackoff, func() error { - latest := &sandboxv1alpha1.BatchSandbox{} + var latest *sandboxv1alpha1.BatchSandbox + if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { + latest = &sandboxv1alpha1.BatchSandbox{} if err := r.Get(ctx, types.NamespacedName{Namespace: bs.Namespace, Name: bs.Name}, latest); err != nil { return err } @@ -674,5 +680,9 @@ func (r *BatchSandboxReconciler) setCondition( latest.Status.Conditions = conditions return r.Status().Update(ctx, latest) - }) + }); err != nil { + return err + } + r.StatusRVExpectation.Expect(latest) + return nil } diff --git a/kubernetes/internal/controller/batchsandbox_pause_resume_test.go b/kubernetes/internal/controller/batchsandbox_pause_resume_test.go index 3688e14ee..29f793002 100644 --- a/kubernetes/internal/controller/batchsandbox_pause_resume_test.go +++ b/kubernetes/internal/controller/batchsandbox_pause_resume_test.go @@ -36,6 +36,7 @@ import ( sandboxv1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/apis/sandbox/v1alpha1" taskscheduler "github.com/alibaba/OpenSandbox/sandbox-k8s/internal/scheduler" + "github.com/alibaba/OpenSandbox/sandbox-k8s/internal/utils/expectations" "github.com/alibaba/OpenSandbox/sandbox-k8s/internal/utils/fieldindex" taskexecutor "github.com/alibaba/OpenSandbox/sandbox-k8s/pkg/task-executor" ) @@ -52,9 +53,10 @@ func newTestReconciler(objs ...client.Object) *BatchSandboxReconciler { WithObjects(objs...). Build() return &BatchSandboxReconciler{ - Client: fakeClient, - Scheme: testscheme, - Recorder: record.NewFakeRecorder(10), + Client: fakeClient, + Scheme: testscheme, + Recorder: record.NewFakeRecorder(10), + StatusRVExpectation: expectations.NewResourceVersionExpectation(), } } @@ -222,7 +224,8 @@ func TestDispatchPauseResume_Case2_PauseFalse(t *testing.T) { } func TestDispatchPauseResume_Case3_PauseNil_ACKOnly(t *testing.T) { - // gen > pauseObservedGen, pause=nil → ACK only, continue normal flow (handled=false) + // gen > pauseObservedGen, pause=nil → no dedicated ACK API call, continue normal flow (handled=false). + // The ACK is deferred to persistRuntimeView in the main reconcile loop. bs := &sandboxv1alpha1.BatchSandbox{ ObjectMeta: metav1.ObjectMeta{ Name: "test-bs", @@ -247,11 +250,10 @@ func TestDispatchPauseResume_Case3_PauseNil_ACKOnly(t *testing.T) { assert.False(t, handled, "ACK only should not block normal flow") assert.Equal(t, ctrl.Result{}, result) - // Verify ACK happened + // Verify ACK is NOT written to server by dispatch (deferred to persistRuntimeView). updated := &sandboxv1alpha1.BatchSandbox{} require.NoError(t, r.Get(context.Background(), types.NamespacedName{Namespace: "default", Name: "test-bs"}, updated)) - assert.Equal(t, int64(2), updated.Status.PauseObservedGeneration) - assert.Equal(t, int64(2), bs.Status.PauseObservedGeneration, "in-memory status should also reflect ACK for the rest of this reconcile") + assert.Equal(t, int64(1), updated.Status.PauseObservedGeneration, "server should not be updated by dispatch; ACK is deferred") } func TestDispatchPauseResume_Case4_GenEqual_PauseSet(t *testing.T) { @@ -1034,9 +1036,10 @@ func TestContinueResume_UsesPatchedTemplateWhenCacheReturnsStaleObject(t *testin }). Build() r := &BatchSandboxReconciler{ - Client: fakeClient, - Scheme: testscheme, - Recorder: record.NewFakeRecorder(10), + Client: fakeClient, + Scheme: testscheme, + Recorder: record.NewFakeRecorder(10), + StatusRVExpectation: expectations.NewResourceVersionExpectation(), } result, err := r.continueResume(context.Background(), bs) @@ -1557,8 +1560,7 @@ func TestPersistRuntimeView_PreservesPauseFailedConditionFromLatestStatus(t *tes } r := newTestReconciler(bs, pod) - stale := bs.DeepCopy() - + // Simulate pause handler writing PauseFailed condition to API server. latest := &sandboxv1alpha1.BatchSandbox{} require.NoError(t, r.Get(context.Background(), types.NamespacedName{Namespace: "default", Name: "test-bs"}, latest)) latest.Status.Conditions = append(latest.Status.Conditions, sandboxv1alpha1.BatchSandboxCondition{ @@ -1570,10 +1572,15 @@ func TestPersistRuntimeView_PreservesPauseFailedConditionFromLatestStatus(t *tes }) require.NoError(t, r.Status().Update(context.Background(), latest)) - view := buildRuntimeView(stale, []*corev1.Pod{pod}) - err := r.persistRuntimeView(context.Background(), stale, view) - require.Empty(t, err) + // Simulate second reconcile: informer has caught up, so we read latest state. + freshBS := &sandboxv1alpha1.BatchSandbox{} + require.NoError(t, r.Get(context.Background(), types.NamespacedName{Namespace: "default", Name: "test-bs"}, freshBS)) + + view := buildRuntimeView(freshBS, []*corev1.Pod{pod}) + _, errs := r.persistRuntimeView(context.Background(), freshBS, view) + require.Empty(t, errs) + // Verify PauseFailed is preserved after reconcile with fresh cache. updated := &sandboxv1alpha1.BatchSandbox{} require.NoError(t, r.Get(context.Background(), types.NamespacedName{Namespace: "default", Name: "test-bs"}, updated)) @@ -1586,7 +1593,7 @@ func TestPersistRuntimeView_PreservesPauseFailedConditionFromLatestStatus(t *tes assert.Equal(t, "Commit job failed", cond.Message) } } - assert.True(t, foundPauseFailed, "persistRuntimeView should preserve latest PauseFailed condition") + assert.True(t, foundPauseFailed, "persistRuntimeView should preserve PauseFailed condition once informer cache catches up") } func TestPersistRuntimeView_SkipsStatusUpdateWhenRuntimeStatusUnchanged(t *testing.T) { @@ -1655,13 +1662,14 @@ func TestPersistRuntimeView_SkipsStatusUpdateWhenRuntimeStatusUnchanged(t *testi }). Build() r := &BatchSandboxReconciler{ - Client: fakeClient, - Scheme: testscheme, - Recorder: record.NewFakeRecorder(10), + Client: fakeClient, + Scheme: testscheme, + Recorder: record.NewFakeRecorder(10), + StatusRVExpectation: expectations.NewResourceVersionExpectation(), } view := buildRuntimeView(bs.DeepCopy(), []*corev1.Pod{pod}) - errs := r.persistRuntimeView(context.Background(), bs.DeepCopy(), view) + _, errs := r.persistRuntimeView(context.Background(), bs.DeepCopy(), view) require.Empty(t, errs) assert.Equal(t, 0, statusUpdates, "unchanged runtime status should not be persisted again") } @@ -1709,7 +1717,7 @@ func TestPersistRuntimeView_RetriesSucceededPauseSnapshotCleanup(t *testing.T) { status := bs.Status view := runtimeView{status: &status} - errs := r.persistRuntimeView(context.Background(), bs.DeepCopy(), view) + _, errs := r.persistRuntimeView(context.Background(), bs.DeepCopy(), view) require.Empty(t, errs) stillPresent := &sandboxv1alpha1.SandboxSnapshot{} @@ -2046,9 +2054,10 @@ func TestCompletePause_DeleteFailureLeavesPhasePausing(t *testing.T) { }). Build() r := &BatchSandboxReconciler{ - Client: fakeClient, - Scheme: testscheme, - Recorder: record.NewFakeRecorder(10), + Client: fakeClient, + Scheme: testscheme, + Recorder: record.NewFakeRecorder(10), + StatusRVExpectation: expectations.NewResourceVersionExpectation(), } err := r.completePause(context.Background(), bs) @@ -2174,9 +2183,10 @@ func TestCompletePause_PooledSandboxDoesNotDeleteSourcePod(t *testing.T) { }). Build() r := &BatchSandboxReconciler{ - Client: fakeClient, - Scheme: testscheme, - Recorder: record.NewFakeRecorder(10), + Client: fakeClient, + Scheme: testscheme, + Recorder: record.NewFakeRecorder(10), + StatusRVExpectation: expectations.NewResourceVersionExpectation(), } err := r.completePause(context.Background(), bs) @@ -2246,9 +2256,10 @@ func TestCompletePause_PooledSandboxAcknowledgesSpecPatchGeneration(t *testing.T }). Build() r := &BatchSandboxReconciler{ - Client: fakeClient, - Scheme: testscheme, - Recorder: record.NewFakeRecorder(10), + Client: fakeClient, + Scheme: testscheme, + Recorder: record.NewFakeRecorder(10), + StatusRVExpectation: expectations.NewResourceVersionExpectation(), } require.NoError(t, r.completePause(context.Background(), bs)) @@ -2321,9 +2332,10 @@ func TestCompletePause_DoesNotAcknowledgeQueuedResumeGeneration(t *testing.T) { }). Build() r := &BatchSandboxReconciler{ - Client: fakeClient, - Scheme: testscheme, - Recorder: record.NewFakeRecorder(10), + Client: fakeClient, + Scheme: testscheme, + Recorder: record.NewFakeRecorder(10), + StatusRVExpectation: expectations.NewResourceVersionExpectation(), } require.NoError(t, r.completePause(context.Background(), bs)) @@ -2490,9 +2502,10 @@ func TestSyncPauseOrClear_SnapshotFailedReturnsStatusUpdateError(t *testing.T) { }). Build() r := &BatchSandboxReconciler{ - Client: fakeClient, - Scheme: testscheme, - Recorder: record.NewFakeRecorder(10), + Client: fakeClient, + Scheme: testscheme, + Recorder: record.NewFakeRecorder(10), + StatusRVExpectation: expectations.NewResourceVersionExpectation(), } result, err := r.syncPauseOrClear(context.Background(), bs) diff --git a/kubernetes/internal/controller/batchsandbox_status.go b/kubernetes/internal/controller/batchsandbox_status.go index 6e3088f71..0427df935 100644 --- a/kubernetes/internal/controller/batchsandbox_status.go +++ b/kubernetes/internal/controller/batchsandbox_status.go @@ -18,12 +18,12 @@ import ( "context" "encoding/json" "fmt" + "time" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/equality" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" - "k8s.io/client-go/util/retry" "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" logf "sigs.k8s.io/controller-runtime/pkg/log" @@ -233,26 +233,47 @@ func applySteadyRuntimePhase(batchSbx *sandboxv1alpha1.BatchSandbox, status *san status.Phase = sandboxv1alpha1.BatchSandboxPhasePending } +// isInitialUnallocatedSandbox returns true when the sandbox has just been created +// and no pods have been allocated yet. In this case we skip writing the initial +// Pending status — the next reconcile after allocation will write Succeed directly. +func isInitialUnallocatedSandbox(batchSbx *sandboxv1alpha1.BatchSandbox, view runtimeView) bool { + return view.status.Replicas == 0 && batchSbx.Status.Phase == "" && + batchSbx.Spec.Replicas != nil && *batchSbx.Spec.Replicas > 0 +} + func (r *BatchSandboxReconciler) persistRuntimeView( ctx context.Context, batchSbx *sandboxv1alpha1.BatchSandbox, view runtimeView, -) []error { +) (time.Duration, []error) { var aggErrors []error log := logf.FromContext(ctx) if err := r.patchBatchSandboxEndpoints(ctx, batchSbx, view.endpointIPs); err != nil { aggErrors = append(aggErrors, err) } - statusChanged := !equality.Semantic.DeepEqual(*view.status, batchSbx.Status) - if statusChanged { - log.Info("To update BatchSandbox status", - "replicas", view.status.Replicas, - "allocated", view.status.Allocated, - "ready", view.status.Ready, - ) - if err := r.updateStatus(batchSbx, view.status); err != nil { + if !equality.Semantic.DeepEqual(*view.status, batchSbx.Status) { + if isInitialUnallocatedSandbox(batchSbx, view) { + return 0, aggErrors + } + // Skip redundant status writes caused by informer cache lag: if we recently + // patched status but the informer hasn't seen the new RV yet, the diff is a + // false positive. Allow a 10s safety valve in case the cache never catches up. + if satisfied, dur := r.StatusRVExpectation.IsSatisfied(batchSbx); !satisfied { + if dur < 10*time.Second { + log.Info("Skipping status update: informer cache is stale", "unsatisfiedDuration", dur.String()) + return time.Second, aggErrors + } + log.Info("Proceeding with status update despite stale cache (timeout exceeded)", "unsatisfiedDuration", dur.String()) + // Fetch the latest object so lifecycle conditions (PauseFailed/ResumeFailed) + // written by pause/resume handlers are not overwritten by the stale cache. + latest := &sandboxv1alpha1.BatchSandbox{} + if err := r.Get(ctx, types.NamespacedName{Namespace: batchSbx.Namespace, Name: batchSbx.Name}, latest); err == nil { + batchSbx = latest + } + } + if err := r.updateStatus(ctx, batchSbx, view.status); err != nil { aggErrors = append(aggErrors, err) - return aggErrors + return 0, aggErrors } } @@ -262,7 +283,7 @@ func (r *BatchSandboxReconciler) persistRuntimeView( aggErrors = append(aggErrors, err) } } - return aggErrors + return 0, aggErrors } func (r *BatchSandboxReconciler) patchBatchSandboxEndpoints(ctx context.Context, batchSbx *sandboxv1alpha1.BatchSandbox, endpointIPs []string) error { @@ -270,7 +291,13 @@ func (r *BatchSandboxReconciler) patchBatchSandboxEndpoints(ctx context.Context, if batchSbx.Annotations[AnnotationSandboxEndpoints] == string(raw) { return nil } - + // Skip writing empty endpoints when annotation doesn't exist yet (e.g. sandbox just created, no pods assigned). + // Still allow clearing endpoints when annotation was previously set (e.g. pause scenario). + _, annotationExists := batchSbx.Annotations[AnnotationSandboxEndpoints] + if !annotationExists && string(raw) == "[]" { + return nil + } + log := logf.FromContext(ctx) patchData, _ := json.Marshal(map[string]any{ "metadata": map[string]any{ "annotations": map[string]string{ @@ -278,21 +305,26 @@ func (r *BatchSandboxReconciler) patchBatchSandboxEndpoints(ctx context.Context, }, }, }) + log.Info("Patching BatchSandbox endpoints", "resourceVersion", batchSbx.ResourceVersion, "patchData", string(patchData)) obj := &sandboxv1alpha1.BatchSandbox{ObjectMeta: metav1.ObjectMeta{Namespace: batchSbx.Namespace, Name: batchSbx.Name}} return r.Patch(ctx, obj, client.RawPatch(types.MergePatchType, patchData)) } -func (r *BatchSandboxReconciler) updateStatus(batchSandbox *sandboxv1alpha1.BatchSandbox, newStatus *sandboxv1alpha1.BatchSandboxStatus) error { - return retry.RetryOnConflict(retry.DefaultBackoff, func() error { - clone := &sandboxv1alpha1.BatchSandbox{} - if err := r.Get(context.TODO(), types.NamespacedName{Namespace: batchSandbox.Namespace, Name: batchSandbox.Name}, clone); err != nil { - return err - } - mergedStatus := newStatus.DeepCopy() - mergedStatus.Conditions = mergeLifecycleConditions(mergedStatus.Conditions, clone.Status.Conditions) - clone.Status = *mergedStatus - return r.Status().Update(context.TODO(), clone) - }) +func (r *BatchSandboxReconciler) updateStatus(ctx context.Context, batchSandbox *sandboxv1alpha1.BatchSandbox, newStatus *sandboxv1alpha1.BatchSandboxStatus) error { + log := logf.FromContext(ctx) + mergedStatus := newStatus.DeepCopy() + mergedStatus.Conditions = mergeLifecycleConditions(mergedStatus.Conditions, batchSandbox.Status.Conditions) + patchData, err := json.Marshal(map[string]any{"status": mergedStatus}) + if err != nil { + return fmt.Errorf("failed to marshal status patch: %w", err) + } + log.Info("Patching BatchSandbox status", "resourceVersion", batchSandbox.ResourceVersion, "phase", mergedStatus.Phase, "patchData", string(patchData)) + obj := &sandboxv1alpha1.BatchSandbox{ObjectMeta: metav1.ObjectMeta{Namespace: batchSandbox.Namespace, Name: batchSandbox.Name}} + if err := r.Status().Patch(ctx, obj, client.RawPatch(types.MergePatchType, patchData)); err != nil { + return err + } + r.StatusRVExpectation.Expect(obj) + return nil } func mergeLifecycleConditions( diff --git a/kubernetes/internal/controller/pool_controller.go b/kubernetes/internal/controller/pool_controller.go index f7c6ad4e7..85e3378eb 100644 --- a/kubernetes/internal/controller/pool_controller.go +++ b/kubernetes/internal/controller/pool_controller.go @@ -114,8 +114,12 @@ type PoolReconciler struct { // +kubebuilder:rbac:groups=core,resources=pods/status,verbs=get;update;patch // +kubebuilder:rbac:groups=core,resources=events,verbs=get;list;watch;create;update;patch;delete -func (r *PoolReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { +func (r *PoolReconciler) Reconcile(ctx context.Context, req ctrl.Request) (result ctrl.Result, retErr error) { log := logf.FromContext(ctx) + start := time.Now() + defer func() { + log.Info("Reconcile finished", "duration", time.Since(start).String(), "requeueAfter", result.RequeueAfter.String(), "error", retErr) + }() // Fetch the Pool instance pool := &sandboxv1alpha1.Pool{} if err := r.Get(ctx, req.NamespacedName, pool); err != nil { @@ -674,8 +678,14 @@ func (r *PoolReconciler) scalePool(ctx context.Context, pool *sandboxv1alpha1.Po errs := make([]error, 0) pods := args.pods if satisfied, unsatisfiedDuration, dirtyPods := PoolScaleExpectations.SatisfiedExpectations(controllerutils.GetControllerKey(pool)); !satisfied { - log.Info("Pool scale is not ready, requeue", "unsatisfiedDuration", unsatisfiedDuration, "dirtyPods", dirtyPods) - return fmt.Errorf("pool scale is not ready, %v", pool.Name) + if unsatisfiedDuration >= expectations.ExpectationTimeout { + log.Info("Pool scale expectations timed out, clearing stale expectations", + "unsatisfiedDuration", unsatisfiedDuration, "dirtyPods", dirtyPods) + PoolScaleExpectations.DeleteExpectations(controllerutils.GetControllerKey(pool)) + } else { + log.Info("Pool scale is not ready, requeue", "unsatisfiedDuration", unsatisfiedDuration, "dirtyPods", dirtyPods) + return fmt.Errorf("pool scale is not ready, %v", pool.Name) + } } schedulableCnt := int32(len(args.pods)) totalPodCnt := args.totalPodCnt diff --git a/kubernetes/internal/controller/sandboxsnapshot_controller.go b/kubernetes/internal/controller/sandboxsnapshot_controller.go index 2c6b6d6f7..c2daaefe1 100644 --- a/kubernetes/internal/controller/sandboxsnapshot_controller.go +++ b/kubernetes/internal/controller/sandboxsnapshot_controller.go @@ -67,6 +67,9 @@ type SandboxSnapshotReconciler struct { // ImageCommitterImage is the image for image-committer (uses nerdctl to commit/push container images) ImageCommitterImage string + // ContainerdSocketPath is containerd socket path for image-committer (nerdctl --address) + ContainerdSocketPath string + // CommitJobTimeout is the timeout for commit jobs (default: 10 minutes) CommitJobTimeout time.Duration @@ -90,8 +93,12 @@ type SandboxSnapshotReconciler struct { // +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch // +kubebuilder:rbac:groups=core,resources=events,verbs=get;list;watch;create;update;patch;delete -func (r *SandboxSnapshotReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { +func (r *SandboxSnapshotReconciler) Reconcile(ctx context.Context, req ctrl.Request) (result ctrl.Result, retErr error) { log := logf.FromContext(ctx) + start := time.Now() + defer func() { + log.Info("Reconcile finished", "duration", time.Since(start).String(), "requeueAfter", result.RequeueAfter.String(), "error", retErr) + }() snapshot := &sandboxv1alpha1.SandboxSnapshot{} if err := r.Get(ctx, req.NamespacedName, snapshot); err != nil { diff --git a/kubernetes/internal/controller/sandboxsnapshot_lifecycle.go b/kubernetes/internal/controller/sandboxsnapshot_lifecycle.go index 26e34ecc2..31e9c6421 100644 --- a/kubernetes/internal/controller/sandboxsnapshot_lifecycle.go +++ b/kubernetes/internal/controller/sandboxsnapshot_lifecycle.go @@ -305,6 +305,13 @@ func (r *SandboxSnapshotReconciler) imageCommitterImage() string { return "image-committer:dev" } +func (r *SandboxSnapshotReconciler) containerdSocketPath() string { + if r.ContainerdSocketPath != "" { + return r.ContainerdSocketPath + } + return ContainerdSocketPath +} + func commitJobSecurityContext() *corev1.SecurityContext { return &corev1.SecurityContext{ RunAsUser: ptrToInt64(0), @@ -326,7 +333,7 @@ func (r *SandboxSnapshotReconciler) buildCommitJob(snapshot *sandboxv1alpha1.San { Name: "containerd-sock", VolumeSource: corev1.VolumeSource{ - HostPath: &corev1.HostPathVolumeSource{Path: ContainerdSocketPath}, + HostPath: &corev1.HostPathVolumeSource{Path: r.containerdSocketPath()}, }, }, } @@ -462,7 +469,7 @@ func (r *SandboxSnapshotReconciler) buildUnpauseJob(snapshot *sandboxv1alpha1.Sa { Name: "containerd-sock", VolumeSource: corev1.VolumeSource{ - HostPath: &corev1.HostPathVolumeSource{Path: ContainerdSocketPath}, + HostPath: &corev1.HostPathVolumeSource{Path: r.containerdSocketPath()}, }, }, }, diff --git a/kubernetes/internal/controller/suite_test.go b/kubernetes/internal/controller/suite_test.go index 33459a69e..abdb549af 100644 --- a/kubernetes/internal/controller/suite_test.go +++ b/kubernetes/internal/controller/suite_test.go @@ -34,6 +34,7 @@ import ( . "github.com/onsi/gomega" sandboxv1alpha1 "github.com/alibaba/OpenSandbox/sandbox-k8s/apis/sandbox/v1alpha1" + "github.com/alibaba/OpenSandbox/sandbox-k8s/internal/utils/expectations" "github.com/alibaba/OpenSandbox/sandbox-k8s/internal/utils/fieldindex" // +kubebuilder:scaffold:imports ) @@ -93,9 +94,10 @@ var _ = BeforeSuite(func() { By("setup reconciler") Expect((&BatchSandboxReconciler{ - Client: k8sManager.GetClient(), - Scheme: k8sManager.GetScheme(), - Recorder: k8sManager.GetEventRecorderFor("test-batch-sandbox-controller"), + Client: k8sManager.GetClient(), + Scheme: k8sManager.GetScheme(), + Recorder: k8sManager.GetEventRecorderFor("test-batch-sandbox-controller"), + StatusRVExpectation: expectations.NewResourceVersionExpectation(), }).SetupWithManager(k8sManager, 32)).Should(Succeed()) Expect((&PoolReconciler{ Client: k8sManager.GetClient(), diff --git a/kubernetes/test/e2e/e2e_suite_test.go b/kubernetes/test/e2e/e2e_suite_test.go index 41f90827a..a7a7719bf 100644 --- a/kubernetes/test/e2e/e2e_suite_test.go +++ b/kubernetes/test/e2e/e2e_suite_test.go @@ -36,6 +36,13 @@ func TestE2E(t *testing.T) { } var _ = BeforeSuite(func() { + if utils.SkipImageBuild() { + _, _ = fmt.Fprintf(GinkgoWriter, + "E2E_MODE=%s SKIP_IMAGE_BUILD=true: skipping docker build & kind load (images expected to be pre-built)\n", + utils.Mode()) + return + } + dockerBuildArgs := os.Getenv("DOCKER_BUILD_ARGS") By("building the manager(Operator) image") @@ -79,22 +86,22 @@ var _ = BeforeSuite(func() { err = utils.LoadImageToKindClusterWithName(utils.ImageCommitterImage) ExpectWithOffset(1, err).NotTo(HaveOccurred(), "Failed to load the image-committer image into Kind") - By("pulling the registry:2 image (required for pause/resume tests)") - cmd = exec.Command("docker", "pull", "--platform", "linux/amd64", "registry:2") + By("pulling the registry image (required for pause/resume tests)") + cmd = exec.Command("docker", "pull", "--platform", "linux/amd64", utils.RegistrySourceImage()) _, err = utils.Run(cmd) - ExpectWithOffset(1, err).NotTo(HaveOccurred(), "Failed to pull registry:2 image") + ExpectWithOffset(1, err).NotTo(HaveOccurred(), "Failed to pull registry image") - By("loading the registry:2 image on Kind") - err = utils.LoadImageToKindClusterWithName("registry:2") - ExpectWithOffset(1, err).NotTo(HaveOccurred(), "Failed to load the registry:2 image into Kind") + By("loading the registry image on Kind") + err = utils.LoadImageToKindClusterWithName(utils.RegistrySourceImage()) + ExpectWithOffset(1, err).NotTo(HaveOccurred(), "Failed to load the registry image into Kind") By("pulling the alpine image (required for commit jobs)") - cmd = exec.Command("docker", "pull", "alpine:latest") + cmd = exec.Command("docker", "pull", utils.AlpineImage()) _, err = utils.Run(cmd) - ExpectWithOffset(1, err).NotTo(HaveOccurred(), "Failed to pull alpine:latest image") + ExpectWithOffset(1, err).NotTo(HaveOccurred(), "Failed to pull alpine image") By("loading the alpine image on Kind") - err = utils.LoadImageToKindClusterWithName("alpine:latest") + err = utils.LoadImageToKindClusterWithName(utils.AlpineImage()) ExpectWithOffset(1, err).NotTo(HaveOccurred(), "Failed to load the alpine image into Kind") }) diff --git a/kubernetes/test/e2e/e2e_test.go b/kubernetes/test/e2e/e2e_test.go index 50bcd3902..4b3ddcfb1 100644 --- a/kubernetes/test/e2e/e2e_test.go +++ b/kubernetes/test/e2e/e2e_test.go @@ -34,7 +34,7 @@ import ( // namespace where the project is deployed in const namespace = "opensandbox-system" -var _ = Describe("Manager", Ordered, func() { +var _ = Describe("Manager", Ordered, Label("Core"), func() { var controllerPodName string // Before running the tests, set up the environment by creating the namespace, @@ -49,11 +49,15 @@ var _ = Describe("Manager", Ordered, func() { Expect(err.Error()).To(ContainSubstring("AlreadyExists"), "Failed to create namespace") } - By("labeling the namespace to enforce the restricted security policy") - cmd = exec.Command("kubectl", "label", "--overwrite", "ns", namespace, - "pod-security.kubernetes.io/enforce=restricted") - _, err = utils.Run(cmd) - Expect(err).NotTo(HaveOccurred(), "Failed to label namespace with restricted policy") + if psa := utils.PodSecurityEnforce(); psa != "" { + By("labeling the namespace to enforce the " + psa + " security policy") + cmd = exec.Command("kubectl", "label", "--overwrite", "ns", namespace, + "pod-security.kubernetes.io/enforce="+psa) + _, err = utils.Run(cmd) + Expect(err).NotTo(HaveOccurred(), "Failed to label namespace with "+psa+" policy") + } else { + By("skipping pod-security label (E2E_POD_SECURITY_ENFORCE is empty)") + } By("installing CRDs") cmd = exec.Command("make", "install") @@ -132,7 +136,7 @@ var _ = Describe("Manager", Ordered, func() { SetDefaultEventuallyTimeout(2 * time.Minute) SetDefaultEventuallyPollingInterval(time.Second) - Context("Manager", func() { + Context("Manager", Label("Manager"), func() { It("should run successfully", func() { By("validating that the controller-manager pod is running as expected") verifyControllerUp := func(g Gomega) { @@ -167,7 +171,7 @@ var _ = Describe("Manager", Ordered, func() { }) }) - Context("Pool", func() { + Context("Pool", Label("Pool"), func() { BeforeAll(func() { By("waiting for controller to be ready") Eventually(func(g Gomega) { @@ -869,7 +873,7 @@ var _ = Describe("Manager", Ordered, func() { }) }) - Context("BatchSandbox", func() { + Context("BatchSandbox", Label("Batch"), func() { BeforeAll(func() { By("waiting for controller to be ready") Eventually(func(g Gomega) { @@ -1391,7 +1395,7 @@ var _ = Describe("Manager", Ordered, func() { }) }) - Context("Task", func() { + Context("Task", Label("Task"), func() { BeforeAll(func() { By("waiting for controller to be ready") Eventually(func(g Gomega) { @@ -1557,7 +1561,7 @@ var _ = Describe("Manager", Ordered, func() { }) }) - Context("Pool Update", func() { + Context("Pool Update", Label("Pool"), func() { BeforeAll(func() { By("waiting for controller to be ready") Eventually(func(g Gomega) { @@ -1815,7 +1819,7 @@ var _ = Describe("Manager", Ordered, func() { }) }) - Context("Pool State Recovery", func() { + Context("Pool State Recovery", Label("Pool"), func() { BeforeAll(func() { By("waiting for controller to be ready") Eventually(func(g Gomega) { @@ -2440,7 +2444,7 @@ var _ = Describe("Manager", Ordered, func() { }) }) - Context("Pool Recycle", func() { + Context("Pool Recycle", Label("Pool"), func() { BeforeAll(func() { By("waiting for controller to be ready") Eventually(func(g Gomega) { @@ -3084,7 +3088,7 @@ var _ = Describe("Manager", Ordered, func() { }) }) - Context("Pool Allocator Integrity", func() { + Context("Pool Allocator Integrity", Label("Pool"), func() { const testNamespace = "default" BeforeAll(func() { @@ -4362,7 +4366,7 @@ var _ = Describe("Manager", Ordered, func() { }) }) - Context("Pool Auto-Assign", func() { + Context("Pool Auto-Assign", Label("Pool"), func() { BeforeAll(func() { By("waiting for controller to be ready") Eventually(func(g Gomega) { diff --git a/kubernetes/test/e2e/pause_resume_test.go b/kubernetes/test/e2e/pause_resume_test.go index 4b3996f21..2ea379500 100644 --- a/kubernetes/test/e2e/pause_resume_test.go +++ b/kubernetes/test/e2e/pause_resume_test.go @@ -30,14 +30,14 @@ import ( "github.com/alibaba/OpenSandbox/sandbox-k8s/test/utils" ) -const ( - pauseResumeNamespace = "default" - registryServiceAddr = "docker-registry.default.svc.cluster.local:5000" - registryUsername = "testuser" - registryPassword = "testpass" +var ( + pauseResumeNamespace = utils.PauseResumeNamespace() + registryServiceAddr = utils.PauseResumeRegistryAddr() + registryUsername = utils.PauseResumeRegistryUser() + registryPassword = utils.PauseResumeRegistryPass() ) -var _ = Describe("PauseResume", Ordered, func() { +var _ = Describe("PauseResume", Ordered, Label("PauseResume"), func() { SetDefaultEventuallyTimeout(3 * time.Minute) SetDefaultEventuallyPollingInterval(time.Second) @@ -49,19 +49,34 @@ var _ = Describe("PauseResume", Ordered, func() { Expect(err.Error()).To(ContainSubstring("AlreadyExists")) } - By("labeling the namespace to enforce the restricted security policy") - cmd = exec.Command("kubectl", "label", "--overwrite", "ns", namespace, - "pod-security.kubernetes.io/enforce=restricted") - _, err = utils.Run(cmd) - Expect(err).NotTo(HaveOccurred(), "Failed to label namespace with restricted policy") + if pauseResumeNamespace != namespace { + By("creating pause-resume namespace") + cmd = exec.Command("kubectl", "create", "ns", pauseResumeNamespace) + _, err = utils.Run(cmd) + if err != nil { + Expect(err.Error()).To(ContainSubstring("AlreadyExists")) + } + } + + if psa := utils.PodSecurityEnforce(); psa != "" { + By("labeling the namespace to enforce the " + psa + " security policy") + cmd = exec.Command("kubectl", "label", "--overwrite", "ns", namespace, + "pod-security.kubernetes.io/enforce="+psa) + _, err = utils.Run(cmd) + Expect(err).NotTo(HaveOccurred(), "Failed to label namespace with "+psa+" policy") + } else { + By("skipping pod-security label (E2E_POD_SECURITY_ENFORCE is empty)") + } By("installing CRDs") - cmd = exec.Command("kubectl", "apply", "-f", "config/crd/bases") + cmd = exec.Command("make", "install") _, err = utils.Run(cmd) Expect(err).NotTo(HaveOccurred(), "Failed to install CRDs") By("deploying the controller-manager") - cmd = exec.Command("kubectl", "apply", "-k", "config/default") + cmd = exec.Command("make", "deploy", + fmt.Sprintf("CONTROLLER_IMG=%s", utils.ControllerImage), + fmt.Sprintf("SNAPSHOT_REGISTRY=%s", registryServiceAddr)) _, err = utils.Run(cmd) Expect(err).NotTo(HaveOccurred(), "Failed to deploy the controller-manager") @@ -82,7 +97,10 @@ var _ = Describe("PauseResume", Ordered, func() { Expect(err).NotTo(HaveOccurred()) By("deploying Docker Registry") - registryYAML, err := renderTemplate("testdata/registry-deployment.yaml", nil) + registryYAML, err := renderTemplate("testdata/registry-deployment.yaml", map[string]interface{}{ + "RegistryImage": utils.RegistrySourceImage(), + "Namespace": pauseResumeNamespace, + }) Expect(err).NotTo(HaveOccurred()) registryFile := filepath.Join("/tmp", "test-registry.yaml") @@ -126,11 +144,11 @@ var _ = Describe("PauseResume", Ordered, func() { utils.Run(cmd) By("undeploying the controller-manager") - cmd = exec.Command("kubectl", "delete", "-k", "config/default", "--ignore-not-found=true") + cmd = exec.Command("make", "undeploy") utils.Run(cmd) By("uninstalling CRDs") - cmd = exec.Command("kubectl", "delete", "-f", "config/crd/bases", "--ignore-not-found=true") + cmd = exec.Command("make", "uninstall") utils.Run(cmd) By("removing manager namespace") diff --git a/kubernetes/test/e2e/testdata/registry-deployment.yaml b/kubernetes/test/e2e/testdata/registry-deployment.yaml index b0f3dc7d7..b97044312 100644 --- a/kubernetes/test/e2e/testdata/registry-deployment.yaml +++ b/kubernetes/test/e2e/testdata/registry-deployment.yaml @@ -2,7 +2,7 @@ apiVersion: apps/v1 kind: Deployment metadata: name: docker-registry - namespace: default + namespace: {{ .Namespace }} spec: replicas: 1 selector: @@ -15,7 +15,7 @@ spec: spec: containers: - name: registry - image: registry:2 + image: {{ .RegistryImage }} ports: - containerPort: 5000 env: @@ -41,12 +41,11 @@ apiVersion: v1 kind: Service metadata: name: docker-registry - namespace: default + namespace: {{ .Namespace }} spec: - type: NodePort + type: ClusterIP ports: - port: 5000 targetPort: 5000 - nodePort: 30500 selector: app: docker-registry \ No newline at end of file diff --git a/kubernetes/test/utils/cluster_mode.go b/kubernetes/test/utils/cluster_mode.go new file mode 100644 index 000000000..2a014e6dd --- /dev/null +++ b/kubernetes/test/utils/cluster_mode.go @@ -0,0 +1,117 @@ +// Copyright 2025 Alibaba Group Holding Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package utils + +import "os" + +const ( + // ModeKind runs e2e against a local Kind cluster (default). + ModeKind = "kind" + // ModeExternal runs e2e against an externally-provided Kubernetes cluster + // using the kubeconfig pointed to by KUBECONFIG. Use this when targeting + // minikube, a shared dev cluster, a CI-provisioned cluster, etc. + ModeExternal = "external" +) + +// Mode returns the e2e cluster mode. Reads E2E_MODE env, defaults to ModeKind. +func Mode() string { + if v := os.Getenv("E2E_MODE"); v != "" { + return v + } + return ModeKind +} + +// IsKind reports whether the current e2e mode is the local Kind cluster. +func IsKind() bool { return Mode() == ModeKind } + +// IsExternal reports whether the current e2e mode targets an +// externally-provided cluster via KUBECONFIG. +func IsExternal() bool { return !IsKind() } + +// SkipImageBuild reports whether the suite should skip the docker-build / +// kind-load steps in BeforeSuite. Non-Kind modes default to true because +// images are expected to be pre-built and pushed to a registry the target +// cluster can pull from. +func SkipImageBuild() bool { + if v := os.Getenv("SKIP_IMAGE_BUILD"); v != "" { + return v == "1" || v == "true" || v == "TRUE" + } + return IsExternal() +} + +// RegistrySourceImage returns the upstream registry image used by the +// in-cluster docker-registry deployment for pause/resume tests. Override +// via REGISTRY_SOURCE_IMAGE to point at a mirror reachable from the target +// cluster. +func RegistrySourceImage() string { + if v := os.Getenv("REGISTRY_SOURCE_IMAGE"); v != "" { + return v + } + return "registry:2" +} + +// AlpineImage returns the alpine image used by commit jobs. +func AlpineImage() string { + if v := os.Getenv("ALPINE_IMAGE"); v != "" { + return v + } + return "alpine:latest" +} + +// PauseResumeNamespace returns the namespace used by pause/resume tests for +// the in-cluster docker-registry and registry secrets. +func PauseResumeNamespace() string { + if v := os.Getenv("PAUSE_RESUME_NAMESPACE"); v != "" { + return v + } + return "default" +} + +// PauseResumeRegistryAddr returns the in-cluster docker-registry service +// address used by pause/resume tests. When unset, it derives the host from +// PauseResumeNamespace() so overriding only PAUSE_RESUME_NAMESPACE is +// sufficient and credential auths stay aligned with the registry endpoint. +func PauseResumeRegistryAddr() string { + if v := os.Getenv("PAUSE_RESUME_REGISTRY_ADDR"); v != "" { + return v + } + return "docker-registry." + PauseResumeNamespace() + ".svc.cluster.local:5000" +} + +// PauseResumeRegistryUser returns the registry username for pause/resume tests. +func PauseResumeRegistryUser() string { + if v := os.Getenv("PAUSE_RESUME_REGISTRY_USER"); v != "" { + return v + } + return "testuser" +} + +// PauseResumeRegistryPass returns the registry password for pause/resume tests. +func PauseResumeRegistryPass() string { + if v := os.Getenv("PAUSE_RESUME_REGISTRY_PASS"); v != "" { + return v + } + return "testpass" +} + +// PodSecurityEnforce returns the value applied to the +// `pod-security.kubernetes.io/enforce` namespace label. Empty string means +// the suite must skip applying the label (some platforms reject restricted). +func PodSecurityEnforce() string { + if v, ok := os.LookupEnv("E2E_POD_SECURITY_ENFORCE"); ok { + return v + } + return "restricted" +} diff --git a/kubernetes/test/utils/utils.go b/kubernetes/test/utils/utils.go index 25669e35e..b947586ef 100644 --- a/kubernetes/test/utils/utils.go +++ b/kubernetes/test/utils/utils.go @@ -163,8 +163,15 @@ func IsCertManagerCRDsInstalled() bool { return false } -// LoadImageToKindClusterWithName loads a local docker image to the kind cluster +// LoadImageToKindClusterWithName loads a local docker image to the kind cluster. +// When E2E_MODE is not "kind" this is a no-op: the image is expected to live in +// a registry the target cluster can pull from, so there is no Kind node to +// load into. func LoadImageToKindClusterWithName(name string) error { + if IsExternal() { + _, _ = fmt.Fprintf(GinkgoWriter, "skipping kind load for %q (E2E_MODE=%s)\n", name, Mode()) + return nil + } cluster := "kind" if v, ok := os.LookupEnv("KIND_CLUSTER"); ok { cluster = v diff --git a/oseps/0004-secure-container-runtime.md b/oseps/0004-secure-container-runtime.md index 9161fb0cd..eddfadca6 100644 --- a/oseps/0004-secure-container-runtime.md +++ b/oseps/0004-secure-container-runtime.md @@ -180,7 +180,7 @@ Extension to `~/.sandbox.toml`. A single `[secure_runtime]` section configures t ```toml [runtime] type = "docker" # or "kubernetes" -execd_image = "opensandbox/execd:v1.0.15" +execd_image = "opensandbox/execd:v1.0.18" # Secure container runtime configuration. # When enabled, ALL sandboxes on this server use the specified runtime. @@ -210,7 +210,7 @@ Example 1 — gVisor on Docker: # ~/.sandbox.toml [runtime] type = "docker" -execd_image = "opensandbox/execd:v1.0.15" +execd_image = "opensandbox/execd:v1.0.18" [secure_runtime] type = "gvisor" @@ -224,7 +224,7 @@ Example 2 — Kata Containers (QEMU) on Kubernetes: # ~/.sandbox.toml [runtime] type = "kubernetes" -execd_image = "opensandbox/execd:v1.0.15" +execd_image = "opensandbox/execd:v1.0.18" [secure_runtime] type = "kata" diff --git a/oseps/0007-fast-sandbox-runtime-support.md b/oseps/0007-fast-sandbox-runtime-support.md index 10db7327e..e11beef78 100644 --- a/oseps/0007-fast-sandbox-runtime-support.md +++ b/oseps/0007-fast-sandbox-runtime-support.md @@ -611,7 +611,7 @@ api_key = "your-secret-key" [runtime] type = "kubernetes" -execd_image = "opensandbox/execd:v1.0.15" +execd_image = "opensandbox/execd:v1.0.18" [kubernetes] namespace = "default" diff --git a/scripts/bump-component-version.sh b/scripts/bump-component-version.sh index 573fc140c..118262d5f 100755 --- a/scripts/bump-component-version.sh +++ b/scripts/bump-component-version.sh @@ -36,16 +36,17 @@ elif [ $# -eq 2 ]; then COMPONENT="$1" NEW_VERSION="$2" else - echo "Usage: $0 [egress|execd|ingress|code-interpreter] NEW_VERSION" >&2 + echo "Usage: $0 [egress|execd|ingress|code-interpreter|image-committer] NEW_VERSION" >&2 echo " $0 NEW_VERSION # bumps egress" >&2 echo "Example: $0 egress v1.0.2" >&2 echo "Example: $0 execd 1.0.7" >&2 echo "Example: $0 ingress v1.0.6" >&2 + echo "Example: $0 image-committer v0.1.0" >&2 exit 1 fi case "$COMPONENT" in - egress|execd|ingress|code-interpreter) ;; + egress|execd|ingress|code-interpreter|image-committer) ;; *) echo "Error: unsupported component: $COMPONENT" >&2 exit 0 diff --git a/scripts/python-k8s-e2e.sh b/scripts/python-k8s-e2e.sh index bfcb09c42..76801410d 100644 --- a/scripts/python-k8s-e2e.sh +++ b/scripts/python-k8s-e2e.sh @@ -1,5 +1,5 @@ #!/bin/bash -# trigger k8s e2e +# trigger k8s e2e (2026-05-18) # Copyright 2026 Alibaba Group Holding Ltd. # # Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/scripts/verify-license.sh b/scripts/verify-license.sh index d2f35b4f1..096d834d2 100755 --- a/scripts/verify-license.sh +++ b/scripts/verify-license.sh @@ -19,6 +19,12 @@ set -euo pipefail +# Print CI diagnostics +echo "License verification started at: $(date -u '+%Y-%m-%dT%H:%M:%SZ')" +echo "Runner: $(hostname) ($(uname -srm))" +echo "User: $(whoami)" +echo "Working directory: $(pwd)" + REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)" CURRENT_YEAR="$(date +%Y)" MIN_YEAR="2025" @@ -46,9 +52,6 @@ IGNORED_PATHS=( is_k8s_mock_go() { local file="${1-}" [[ -z "$file" ]] && return 1 - # Skip any Go mocks under kubernetes/internal: - # - filenames ending with _mock.go - # - any file under a /mock/ directory if [[ "$file" != kubernetes/internal/* ]]; then return 1 fi @@ -63,7 +66,6 @@ is_k8s_mock_go() { is_generated_to_skip() { local file="$1" - # Skip common generated files if [[ "$file" == *"deepcopy.go" ]]; then return 0 fi @@ -112,25 +114,20 @@ has_expected_basename() { missing=() while IFS= read -r file; do - # Skip ignored paths if is_ignored "$file"; then continue fi - # Skip kubernetes internal mock go files if is_k8s_mock_go "$file"; then continue fi - # Skip generated files if is_generated_to_skip "$file"; then continue fi - # Only check files with expected extensions or basenames if ! has_expected_extension "$file" && ! has_expected_basename "$file"; then continue fi - # Limit scan to the first 25 lines to allow shebangs/DOCTYPE above the header. header="$(head -n 25 "$file")" if ! echo "$header" | grep -Eq "$LICENSE_REGEX"; then missing+=("$file") diff --git a/sdks/AGENTS.md b/sdks/AGENTS.md index b0acafe30..b021a68cd 100644 --- a/sdks/AGENTS.md +++ b/sdks/AGENTS.md @@ -120,6 +120,7 @@ Always: - Keep package-local validation fast before widening to multi-language verification. - Match public behavior across languages unless a documented platform constraint prevents it. - Keep wire-format units and public SDK units separate. Public SDK interfaces should expose time durations as language-native duration types where available (`timedelta`, `Duration`) or otherwise as explicitly second-based fields such as `timeoutSeconds`. +- For Kotlin SDK public APIs intended for Java interoperability, do not expose Kotlin value classes such as `kotlin.time.Duration`; they are JVM-name-mangled and can be inaccessible from Java. Prefer `java.time.Duration` or explicit primitive wire units at the public boundary, with deprecated Kotlin-friendly overloads when needed for migration. Ask first: diff --git a/sdks/package.json b/sdks/package.json index a7732a545..110323d80 100644 --- a/sdks/package.json +++ b/sdks/package.json @@ -17,7 +17,8 @@ "picomatch@^4.0.0": "4.0.4", "brace-expansion@^1.0.0": "1.1.13", "brace-expansion@^2.0.0": "2.0.3", - "flatted@^3.0.0": "3.4.2" + "flatted@^3.0.0": "3.4.2", + "fast-uri@^3.0.0": "3.1.2" } }, "devDependencies": { diff --git a/sdks/pnpm-lock.yaml b/sdks/pnpm-lock.yaml index a9434f93a..43612f5fc 100644 --- a/sdks/pnpm-lock.yaml +++ b/sdks/pnpm-lock.yaml @@ -11,6 +11,7 @@ overrides: brace-expansion@^1.0.0: 1.1.13 brace-expansion@^2.0.0: 2.0.3 flatted@^3.0.0: 3.4.2 + fast-uri@^3.0.0: 3.1.2 importers: @@ -695,8 +696,8 @@ packages: fast-levenshtein@2.0.6: resolution: {integrity: sha512-DCXu6Ifhqcks7TZKY3Hxp3y6qphY5SJZmrWMDrKcERSOXWQdMhU9Ig/PYrzyw/ul9jOIyh0N4M0tbC5hodg8dw==} - fast-uri@3.1.0: - resolution: {integrity: sha512-iPeeDKJSWf4IEOasVVrknXpaBV0IApz/gp7S2bb7Z4Lljbl2MGJRqInZiUrQwV16cpzw/D3S5j5Julj/gT52AA==} + fast-uri@3.1.2: + resolution: {integrity: sha512-rVjf7ArG3LTk+FS6Yw81V1DLuZl1bRbNrev6Tmd/9RaroeeRRJhAt7jg/6YFxbvAQXUCavSoZhPPj6oOx+5KjQ==} fdir@6.5.0: resolution: {integrity: sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==} @@ -1263,7 +1264,7 @@ snapshots: '@redocly/ajv@8.17.1': dependencies: fast-deep-equal: 3.1.3 - fast-uri: 3.1.0 + fast-uri: 3.1.2 json-schema-traverse: 1.0.0 require-from-string: 2.0.2 @@ -1650,7 +1651,7 @@ snapshots: fast-levenshtein@2.0.6: {} - fast-uri@3.1.0: {} + fast-uri@3.1.2: {} fdir@6.5.0(picomatch@4.0.4): optionalDependencies: diff --git a/sdks/sandbox/csharp/src/OpenSandbox/Adapters/EgressAdapter.cs b/sdks/sandbox/csharp/src/OpenSandbox/Adapters/EgressAdapter.cs index 05eb00527..dd913783a 100644 --- a/sdks/sandbox/csharp/src/OpenSandbox/Adapters/EgressAdapter.cs +++ b/sdks/sandbox/csharp/src/OpenSandbox/Adapters/EgressAdapter.cs @@ -54,6 +54,13 @@ public async Task PatchRulesAsync( await _client.PatchAsync("/policy", normalizedRules, cancellationToken).ConfigureAwait(false); } + public async Task DeleteRulesAsync( + IReadOnlyList targets, + CancellationToken cancellationToken = default) + { + await _client.DeleteAsync("/policy", targets.ToList(), cancellationToken).ConfigureAwait(false); + } + private static NetworkPolicy ParseNetworkPolicy(JsonElement element) { var policy = new NetworkPolicy(); diff --git a/sdks/sandbox/csharp/src/OpenSandbox/Internal/HttpClientWrapper.cs b/sdks/sandbox/csharp/src/OpenSandbox/Internal/HttpClientWrapper.cs index dbc575598..af6462533 100644 --- a/sdks/sandbox/csharp/src/OpenSandbox/Internal/HttpClientWrapper.cs +++ b/sdks/sandbox/csharp/src/OpenSandbox/Internal/HttpClientWrapper.cs @@ -189,6 +189,23 @@ public async Task DeleteAsync( await EnsureSuccessAsync(response, cancellationToken).ConfigureAwait(false); } + public async Task DeleteAsync( + string path, + object body, + CancellationToken cancellationToken) + { + var url = BuildUrl(path); + _logger.LogDebug("HTTP DELETE {Url}", url); + using var request = new HttpRequestMessage(HttpMethod.Delete, url); + ApplyDefaultHeaders(request); + + var json = JsonSerializer.Serialize(body, JsonOptions); + request.Content = new StringContent(json, Encoding.UTF8, "application/json"); + + using var response = await _httpClient.SendAsync(request, cancellationToken).ConfigureAwait(false); + await EnsureSuccessAsync(response, cancellationToken).ConfigureAwait(false); + } + public async Task SendAsync( HttpRequestMessage request, CancellationToken cancellationToken = default) diff --git a/sdks/sandbox/csharp/src/OpenSandbox/Sandbox.cs b/sdks/sandbox/csharp/src/OpenSandbox/Sandbox.cs index 132b26451..a93193f66 100644 --- a/sdks/sandbox/csharp/src/OpenSandbox/Sandbox.cs +++ b/sdks/sandbox/csharp/src/OpenSandbox/Sandbox.cs @@ -593,6 +593,23 @@ public async Task PatchEgressRulesAsync( await _egress.PatchRulesAsync(rules, cancellationToken).ConfigureAwait(false); } + /// + /// Deletes egress rules for this sandbox by target. + /// + /// Each entry is a FQDN or wildcard domain. Matching rules are removed + /// from the currently enforced policy. Targets not present in the policy + /// are silently ignored (idempotent). The current defaultAction is + /// preserved. + /// + /// Target FQDNs or wildcard domains to remove. + /// Cancellation token. + public async Task DeleteEgressRulesAsync( + IReadOnlyList targets, + CancellationToken cancellationToken = default) + { + await _egress.DeleteRulesAsync(targets, cancellationToken).ConfigureAwait(false); + } + /// /// Gets the endpoint for a port. /// diff --git a/sdks/sandbox/csharp/src/OpenSandbox/Services/IEgress.cs b/sdks/sandbox/csharp/src/OpenSandbox/Services/IEgress.cs index aaaca49c4..5f8fde0b4 100644 --- a/sdks/sandbox/csharp/src/OpenSandbox/Services/IEgress.cs +++ b/sdks/sandbox/csharp/src/OpenSandbox/Services/IEgress.cs @@ -26,4 +26,8 @@ public interface IEgress Task PatchRulesAsync( IReadOnlyList rules, CancellationToken cancellationToken = default); + + Task DeleteRulesAsync( + IReadOnlyList targets, + CancellationToken cancellationToken = default); } diff --git a/sdks/sandbox/csharp/tests/OpenSandbox.Tests/SandboxEgressLifecycleTests.cs b/sdks/sandbox/csharp/tests/OpenSandbox.Tests/SandboxEgressLifecycleTests.cs index 685539a6f..b33ffa24e 100644 --- a/sdks/sandbox/csharp/tests/OpenSandbox.Tests/SandboxEgressLifecycleTests.cs +++ b/sdks/sandbox/csharp/tests/OpenSandbox.Tests/SandboxEgressLifecycleTests.cs @@ -55,12 +55,15 @@ await sandbox.PatchEgressRulesAsync([new NetworkRule Action = NetworkRuleAction.Allow, Target = "www.github.com" }]); + await sandbox.DeleteEgressRulesAsync(["www.github.com", "*.blocked.org"]); sandboxes.EndpointCalls.Should().Equal(Constants.DefaultExecdPort, Constants.DefaultEgressPort); adapterFactory.EgressStackCallCount.Should().Be(1); adapterFactory.LastEgressBaseUrl.Should().Be($"http://127.0.0.1:{Constants.DefaultEgressPort}"); egress.GetPolicyCallCount.Should().Be(1); egress.PatchRulesCallCount.Should().Be(1); + egress.DeleteRulesCallCount.Should().Be(1); + egress.LastDeleteTargets.Should().Equal("www.github.com", "*.blocked.org"); } [Fact] @@ -300,6 +303,10 @@ private sealed class StubEgress : IEgress public int PatchRulesCallCount { get; private set; } + public int DeleteRulesCallCount { get; private set; } + + public IReadOnlyList LastDeleteTargets { get; private set; } = []; + public Task GetPolicyAsync(CancellationToken cancellationToken = default) { GetPolicyCallCount++; @@ -319,6 +326,13 @@ public Task PatchRulesAsync(IReadOnlyList rules, CancellationToken PatchRulesCallCount++; return Task.CompletedTask; } + + public Task DeleteRulesAsync(IReadOnlyList targets, CancellationToken cancellationToken = default) + { + DeleteRulesCallCount++; + LastDeleteTargets = targets.ToList(); + return Task.CompletedTask; + } } private sealed class StubFiles : ISandboxFiles diff --git a/sdks/sandbox/go/config.go b/sdks/sandbox/go/config.go index 6e8718068..67a1bd837 100644 --- a/sdks/sandbox/go/config.go +++ b/sdks/sandbox/go/config.go @@ -177,22 +177,24 @@ func (c *ConnectionConfig) lifecycleClient() *LifecycleClient { return NewLifecycleClient(c.GetBaseURL()+"/"+APIVersion, c.GetAPIKey(), c.clientOpts(true)...) } -// execdClient creates an ExecdClient for a resolved endpoint. -// endpointHeaders are additional headers from the endpoint resolution (e.g. routing headers). -func (c *ConnectionConfig) execdClient(endpointURL, token string, endpointHeaders map[string]string) *ExecdClient { +// execdClient creates an ExecdClient for a resolved endpoint. All headers +// returned by the lifecycle GetEndpoint call (auth tokens, routing hints, +// sticky-session keys, etc.) are forwarded as-is on every subsequent request. +func (c *ConnectionConfig) execdClient(endpointURL string, endpointHeaders map[string]string) *ExecdClient { opts := c.clientOpts(true) if len(endpointHeaders) > 0 { opts = append(opts, WithHeaders(endpointHeaders)) } - return NewExecdClient(endpointURL, token, opts...) + return NewExecdClient(endpointURL, "", opts...) } -// egressClient creates an EgressClient for a resolved endpoint. -// endpointHeaders are additional headers from the endpoint resolution (e.g. routing headers). -func (c *ConnectionConfig) egressClient(endpointURL, token string, endpointHeaders map[string]string) *EgressClient { +// egressClient creates an EgressClient for a resolved endpoint. All headers +// returned by the lifecycle GetEndpoint call are forwarded as-is on every +// subsequent request. +func (c *ConnectionConfig) egressClient(endpointURL string, endpointHeaders map[string]string) *EgressClient { opts := c.clientOpts(false) if len(endpointHeaders) > 0 { opts = append(opts, WithHeaders(endpointHeaders)) } - return NewEgressClient(endpointURL, token, opts...) + return NewEgressClient(endpointURL, "", opts...) } diff --git a/sdks/sandbox/go/constants.go b/sdks/sandbox/go/constants.go index de0bc557c..a3f659f8e 100644 --- a/sdks/sandbox/go/constants.go +++ b/sdks/sandbox/go/constants.go @@ -38,6 +38,9 @@ const ( // DefaultCodeInterpreterTimeoutSeconds is the default TTL for code interpreter sandboxes. DefaultCodeInterpreterTimeoutSeconds = 900 + // Version is the SDK version reported in the User-Agent header. + Version = "1.0.1" + // APIVersion is the lifecycle API version prefix. APIVersion = "v1" diff --git a/sdks/sandbox/go/crypto_policy.go b/sdks/sandbox/go/crypto_policy.go index 08ea4e17c..644a871bd 100644 --- a/sdks/sandbox/go/crypto_policy.go +++ b/sdks/sandbox/go/crypto_policy.go @@ -15,7 +15,6 @@ package opensandbox import ( - "crypto/dsa" "crypto/ecdsa" "crypto/ed25519" "crypto/rsa" @@ -25,20 +24,18 @@ import ( ) const ( - nistMinRSABits = 2048 - nistMinDLKeyBits = 224 - nistMinDLGroupBits = 2048 - nistMinECBits = 224 - nistMinHashBits = 224 + nistMinRSABits = 2048 + nistMinECBits = 224 + nistMinHashBits = 224 ) func minHashBitsForSignatureAlgorithm(algo x509.SignatureAlgorithm) (int, error) { switch algo { case x509.MD2WithRSA, x509.MD5WithRSA: return 128, nil - case x509.SHA1WithRSA, x509.DSAWithSHA1, x509.ECDSAWithSHA1: + case x509.SHA1WithRSA, x509.ECDSAWithSHA1: return 160, nil - case x509.DSAWithSHA256, x509.SHA256WithRSA, x509.ECDSAWithSHA256: + case x509.SHA256WithRSA, x509.ECDSAWithSHA256: return 256, nil case x509.SHA384WithRSA, x509.ECDSAWithSHA384: return 384, nil @@ -103,26 +100,6 @@ func ensureCertPublicKeyMeetsNISTMinimums(cert *x509.Certificate) error { nistMinECBits, ) } - case *dsa.PublicKey: - if pub.Parameters.P == nil || pub.Parameters.Q == nil { - return fmt.Errorf("certificate DSA public key parameters are incomplete") - } - subgroupBits := pub.Parameters.Q.BitLen() - groupBits := pub.Parameters.P.BitLen() - if subgroupBits < nistMinDLKeyBits { - return fmt.Errorf( - "certificate DSA subgroup (Q) length %d bits is below NIST minimum %d bits", - subgroupBits, - nistMinDLKeyBits, - ) - } - if groupBits < nistMinDLGroupBits { - return fmt.Errorf( - "certificate DSA group (P) length %d bits is below NIST minimum %d bits", - groupBits, - nistMinDLGroupBits, - ) - } case ed25519.PublicKey: bits := len(pub) * 8 if bits < nistMinECBits { diff --git a/sdks/sandbox/go/egress.go b/sdks/sandbox/go/egress.go index eabe14567..d0536ea23 100644 --- a/sdks/sandbox/go/egress.go +++ b/sdks/sandbox/go/egress.go @@ -22,6 +22,9 @@ type EgressClient struct { *Client } +// egressAuthHeader is the authentication header used by the Egress sidecar API. +const egressAuthHeader = "OPENSANDBOX-EGRESS-AUTH" + // NewEgressClient creates a new EgressClient. // baseURL is the sandbox-specific egress sidecar endpoint // (e.g. "http://localhost:18080"). @@ -29,7 +32,7 @@ type EgressClient struct { // if the sidecar does not require authentication. func NewEgressClient(baseURL, authToken string, opts ...Option) *EgressClient { return &EgressClient{ - Client: NewClient(baseURL, authToken, "OPENSANDBOX-EGRESS-AUTH", opts...), + Client: NewClient(baseURL, authToken, egressAuthHeader, opts...), } } @@ -52,3 +55,15 @@ func (c *EgressClient) PatchPolicy(ctx context.Context, rules []NetworkRule) (*P } return &resp, nil } + +// DeletePolicy removes egress rules matching the given targets from the current +// policy. Each target is a FQDN or wildcard domain. Targets not present in the +// policy are silently ignored (idempotent). The current defaultAction is +// preserved. +func (c *EgressClient) DeletePolicy(ctx context.Context, targets []string) (*PolicyStatusResponse, error) { + var resp PolicyStatusResponse + if err := c.doRequest(ctx, "DELETE", "/policy", targets, &resp); err != nil { + return nil, err + } + return &resp, nil +} diff --git a/sdks/sandbox/go/execd.go b/sdks/sandbox/go/execd.go index b085a562e..82378e5ea 100644 --- a/sdks/sandbox/go/execd.go +++ b/sdks/sandbox/go/execd.go @@ -51,7 +51,9 @@ func (e *ExecdClient) Ping(ctx context.Context) error { // ListContexts returns all active code execution contexts for the given language. func (e *ExecdClient) ListContexts(ctx context.Context, language string) ([]CodeContext, error) { var result []CodeContext - path := "/code/contexts?language=" + url.QueryEscape(language) + params := url.Values{} + params.Set("language", language) + path := "/code/contexts?" + params.Encode() err := e.client.doRequest(ctx, http.MethodGet, path, nil, &result) return result, err } @@ -85,7 +87,9 @@ func (e *ExecdClient) DeleteContext(ctx context.Context, contextID string) error // DeleteContextsByLanguage deletes all code execution contexts for the given language. func (e *ExecdClient) DeleteContextsByLanguage(ctx context.Context, language string) error { - path := "/code/contexts?language=" + url.QueryEscape(language) + params := url.Values{} + params.Set("language", language) + path := "/code/contexts?" + params.Encode() return e.client.doRequest(ctx, http.MethodDelete, path, nil, nil) } @@ -97,7 +101,9 @@ func (e *ExecdClient) ExecuteCode(ctx context.Context, req RunCodeRequest, handl // InterruptCode interrupts the currently running code execution. func (e *ExecdClient) InterruptCode(ctx context.Context, sessionID string) error { - path := "/code?id=" + url.QueryEscape(sessionID) + params := url.Values{} + params.Set("id", sessionID) + path := "/code?" + params.Encode() return e.client.doRequest(ctx, http.MethodDelete, path, nil, nil) } @@ -131,7 +137,9 @@ func (e *ExecdClient) RunCommand(ctx context.Context, req RunCommandRequest, han // InterruptCommand interrupts the currently running command execution. func (e *ExecdClient) InterruptCommand(ctx context.Context, sessionID string) error { - path := "/command?id=" + url.QueryEscape(sessionID) + params := url.Values{} + params.Set("id", sessionID) + path := "/command?" + params.Encode() return e.client.doRequest(ctx, http.MethodDelete, path, nil, nil) } @@ -161,6 +169,7 @@ func (e *ExecdClient) GetCommandLogs(ctx context.Context, commandID string, curs if err != nil { return fmt.Errorf("opensandbox: create request: %w", err) } + req.Header.Set("User-Agent", "OpenSandbox-Go-SDK/"+Version) for k, v := range e.client.headers { req.Header.Set(k, v) } @@ -206,7 +215,9 @@ func (e *ExecdClient) GetCommandLogs(ctx context.Context, commandID string, curs // GetFileInfo retrieves metadata for the file at the given path. func (e *ExecdClient) GetFileInfo(ctx context.Context, path string) (map[string]FileInfo, error) { var result map[string]FileInfo - reqPath := "/files/info?path=" + url.QueryEscape(path) + params := url.Values{} + params.Set("path", path) + reqPath := "/files/info?" + params.Encode() err := e.client.doRequest(ctx, http.MethodGet, reqPath, nil, &result) return result, err } @@ -274,6 +285,7 @@ func (e *ExecdClient) UploadFiles(ctx context.Context, entries []UploadFileEntry } defer bodyCloser.Close() + req.Header.Set("User-Agent", "OpenSandbox-Go-SDK/"+Version) for k, v := range e.client.headers { req.Header.Set(k, v) } @@ -363,7 +375,9 @@ func (e *ExecdClient) newUploadFilesRequest(ctx context.Context, entries []Uploa // returned io.ReadCloser. Pass rangeHeader (e.g. "bytes=0-1023") for partial // content, or empty string for the full file. func (e *ExecdClient) DownloadFile(ctx context.Context, remotePath string, rangeHeader string) (io.ReadCloser, error) { - reqPath := "/files/download?path=" + url.QueryEscape(remotePath) + params := url.Values{} + params.Set("path", remotePath) + reqPath := "/files/download?" + params.Encode() var resp *http.Response err := e.client.withRetry(ctx, func() error { @@ -371,6 +385,7 @@ func (e *ExecdClient) DownloadFile(ctx context.Context, remotePath string, range if err != nil { return fmt.Errorf("opensandbox: create request: %w", err) } + req.Header.Set("User-Agent", "OpenSandbox-Go-SDK/"+Version) for k, v := range e.client.headers { req.Header.Set(k, v) } @@ -418,7 +433,9 @@ func OctalMode(m os.FileMode) int { // DeleteDirectory deletes a directory and all its contents recursively. func (e *ExecdClient) DeleteDirectory(ctx context.Context, path string) error { - reqPath := "/directories?path=" + url.QueryEscape(path) + params := url.Values{} + params.Set("path", path) + reqPath := "/directories?" + params.Encode() return e.client.doRequest(ctx, http.MethodDelete, reqPath, nil, nil) } diff --git a/sdks/sandbox/go/go.mod b/sdks/sandbox/go/go.mod index 102c1d54f..c17718eac 100644 --- a/sdks/sandbox/go/go.mod +++ b/sdks/sandbox/go/go.mod @@ -1,11 +1,3 @@ module github.com/alibaba/OpenSandbox/sdks/sandbox/go go 1.20 - -require github.com/oapi-codegen/runtime v1.2.0 - -require ( - github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect - github.com/google/uuid v1.6.0 // indirect - github.com/stretchr/testify v1.11.1 // indirect -) diff --git a/sdks/sandbox/go/go.sum b/sdks/sandbox/go/go.sum index 0659558c5..e69de29bb 100644 --- a/sdks/sandbox/go/go.sum +++ b/sdks/sandbox/go/go.sum @@ -1,19 +0,0 @@ -github.com/RaveNoX/go-jsoncommentstrip v1.0.0/go.mod h1:78ihd09MekBnJnxpICcwzCMzGrKSKYe4AqU6PDYYpjk= -github.com/apapsch/go-jsonmerge/v2 v2.0.0 h1:axGnT1gRIfimI7gJifB699GoE/oq+F2MU7Dml6nw9rQ= -github.com/apapsch/go-jsonmerge/v2 v2.0.0/go.mod h1:lvDnEdqiQrp0O42VQGgmlKpxL1AP2+08jFMw88y4klk= -github.com/bmatcuk/doublestar v1.1.1/go.mod h1:UD6OnuiIn0yFxxA2le/rnRU1G4RaI4UvFv1sNto9p6w= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= -github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= -github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/juju/gnuflag v0.0.0-20171113085948-2ce1bb71843d/go.mod h1:2PavIy+JPciBPrBUjwbNvtwB6RQlve+hkpll6QSNmOE= -github.com/oapi-codegen/runtime v1.2.0 h1:RvKc1CVS1QeKSNzO97FBQbSMZyQ8s6rZd+LpmzwHMP4= -github.com/oapi-codegen/runtime v1.2.0/go.mod h1:Y7ZhmmlE8ikZOmuHRRndiIm7nf3xcVv+YMweKgG1DT0= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/spkg/bom v0.0.0-20160624110644-59b7046e48ad/go.mod h1:qLr4V1qq6nMqFKkMo8ZTx3f+BZEkzsRUY10Xsm2mwU0= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= -github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= -github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= -gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= diff --git a/sdks/sandbox/go/http.go b/sdks/sandbox/go/http.go index ca09e6eba..8ebe8bbec 100644 --- a/sdks/sandbox/go/http.go +++ b/sdks/sandbox/go/http.go @@ -143,6 +143,7 @@ func (c *Client) doRequestOnce(ctx context.Context, method, path string, body an return fmt.Errorf("opensandbox: create request: %w", err) } + req.Header.Set("User-Agent", "OpenSandbox-Go-SDK/"+Version) for k, v := range c.headers { req.Header.Set(k, v) } @@ -166,12 +167,14 @@ func (c *Client) doRequestOnce(ctx context.Context, method, path string, body an // No content (e.g. 204) if resp.StatusCode == http.StatusNoContent || result == nil { + io.Copy(io.Discard, resp.Body) return nil } if err := json.NewDecoder(resp.Body).Decode(result); err != nil { return fmt.Errorf("opensandbox: decode response: %w", err) } + io.Copy(io.Discard, resp.Body) return nil } @@ -197,6 +200,7 @@ func (c *Client) doStreamRequest(ctx context.Context, method, path string, body return fmt.Errorf("opensandbox: create request: %w", err) } + req.Header.Set("User-Agent", "OpenSandbox-Go-SDK/"+Version) for k, v := range c.headers { req.Header.Set(k, v) } diff --git a/sdks/sandbox/go/manager.go b/sdks/sandbox/go/manager.go index 25ed9399a..e7c58acd4 100644 --- a/sdks/sandbox/go/manager.go +++ b/sdks/sandbox/go/manager.go @@ -88,4 +88,4 @@ func (m *SandboxManager) DeleteSnapshot(ctx context.Context, snapshotID string) } // Close releases local resources. Currently a no-op placeholder. -func (m *SandboxManager) Close() {} +func (m *SandboxManager) Close() error { return nil } diff --git a/sdks/sandbox/go/opensandbox_test.go b/sdks/sandbox/go/opensandbox_test.go index ffec48ff7..7ae3cf71f 100644 --- a/sdks/sandbox/go/opensandbox_test.go +++ b/sdks/sandbox/go/opensandbox_test.go @@ -263,6 +263,68 @@ func TestCreateSandbox_FromSnapshot(t *testing.T) { require.NoErrorf(t, err, "CreateSandbox from snapshot") } +func TestCreateSandbox_Platform(t *testing.T) { + _, client := newLifecycleServer(t, func(w http.ResponseWriter, r *http.Request) { + var req CreateSandboxRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + assert.Fail(t, fmt.Sprintf("decode request: %v", err)) + return + } + require.NotNil(t, req.Platform, "expected Platform to be sent in the request") + require.Equal(t, OSWindows, req.Platform.OS, "Platform.OS") + require.Equal(t, ArchAMD64, req.Platform.Arch, "Platform.Arch") + + jsonResponse(w, http.StatusCreated, SandboxInfo{ + ID: "sbx-windows", + Status: SandboxStatus{State: StatePending}, + Platform: &PlatformSpec{OS: OSWindows, Arch: ArchAMD64}, + CreatedAt: time.Now().UTC().Truncate(time.Second), + }) + }) + + info, err := client.CreateSandbox(context.Background(), CreateSandboxRequest{ + Image: &ImageSpec{URI: "dockurr/windows:latest"}, + Entrypoint: []string{"cmd", "/c", "echo hi"}, + ResourceLimits: ResourceLimits{"cpu": "2", "memory": "4G", "disk": "64G"}, + Platform: &PlatformSpec{OS: OSWindows, Arch: ArchAMD64}, + }) + require.NoErrorf(t, err, "CreateSandbox with Platform") + require.NotNil(t, info.Platform, "response should echo Platform") + require.Equal(t, OSWindows, info.Platform.OS, "echoed Platform.OS") + require.Equal(t, ArchAMD64, info.Platform.Arch, "echoed Platform.Arch") +} + +func TestCreateSandbox_PlatformOmittedWhenNil(t *testing.T) { + _, client := newLifecycleServer(t, func(w http.ResponseWriter, r *http.Request) { + body, err := io.ReadAll(r.Body) + if err != nil { + assert.Fail(t, fmt.Sprintf("read request body: %v", err)) + return + } + var raw map[string]json.RawMessage + if err := json.Unmarshal(body, &raw); err != nil { + assert.Fail(t, fmt.Sprintf("unmarshal request body: %v", err)) + return + } + if _, present := raw["platform"]; present { + assert.Fail(t, "platform should be omitted from JSON when nil") + } + + jsonResponse(w, http.StatusCreated, SandboxInfo{ + ID: "sbx-no-platform", + Status: SandboxStatus{State: StatePending}, + CreatedAt: time.Now().UTC().Truncate(time.Second), + }) + }) + + _, err := client.CreateSandbox(context.Background(), CreateSandboxRequest{ + Image: &ImageSpec{URI: "python:3.12"}, + Entrypoint: []string{"/bin/sh"}, + ResourceLimits: ResourceLimits{"cpu": "500m"}, + }) + require.NoErrorf(t, err, "CreateSandbox without Platform") +} + func TestGetSandbox(t *testing.T) { want := SandboxInfo{ ID: "sbx-456", @@ -578,6 +640,47 @@ func TestPatchPolicy(t *testing.T) { require.Len(t, got.Policy.Egress, 2) } +func TestDeletePolicy(t *testing.T) { + want := PolicyStatusResponse{ + Status: "ok", + Mode: "deny_all", + Policy: &NetworkPolicy{ + DefaultAction: "deny", + Egress: []NetworkRule{ + {Action: "allow", Target: "api.example.com"}, + }, + }, + } + + _, client := newEgressServer(t, func(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodDelete { + assert.Fail(t, fmt.Sprintf("expected DELETE, got %s", r.Method)) + } + + var targets []string + if err := json.NewDecoder(r.Body).Decode(&targets); err != nil { + assert.Fail(t, fmt.Sprintf("decode body: %v", err)) + } + if len(targets) != 2 { + assert.Fail(t, fmt.Sprintf("expected 2 targets in request, got %d", len(targets))) + } + if targets[0] != "bad.example.com" || targets[1] != "*.blocked.org" { + assert.Fail(t, fmt.Sprintf("unexpected targets: %v", targets)) + } + + jsonResponse(w, http.StatusOK, want) + }) + + got, err := client.DeletePolicy(context.Background(), []string{ + "bad.example.com", + "*.blocked.org", + }) + require.NoErrorf(t, err, "DeletePolicy") + require.NotNil(t, got.Policy) + require.Len(t, got.Policy.Egress, 1) + require.Equal(t, "api.example.com", got.Policy.Egress[0].Target) +} + func TestPing(t *testing.T) { _, client := newExecdServer(t, func(w http.ResponseWriter, r *http.Request) { if r.Method != http.MethodGet { @@ -1022,6 +1125,92 @@ func TestExecdAuthHeader(t *testing.T) { require.NoErrorf(t, err, "Ping") } +// TestResolveExecdForwardsAllEndpointHeaders verifies that every header +// returned by GetEndpoint (auth tokens, routing hints, sticky-session keys, +// etc.) is forwarded as-is on subsequent execd requests, mirroring the +// Python SDK behavior. +func TestResolveExecdForwardsAllEndpointHeaders(t *testing.T) { + endpointHeaders := map[string]string{ + "X-EXECD-ACCESS-TOKEN": "execd-tok", + "X-Route-Hint": "vip-pool", + "X-Sticky-Session": "sess-abc", + } + + execdSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + for k, want := range endpointHeaders { + if got := r.Header.Get(k); got != want { + assert.Fail(t, fmt.Sprintf("header %s = %q, want %q", k, got, want)) + } + } + w.WriteHeader(http.StatusOK) + })) + defer execdSrv.Close() + + lifecycleSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method == http.MethodGet && strings.Contains(r.URL.Path, "/endpoints/") { + jsonResponse(w, http.StatusOK, Endpoint{ + Endpoint: execdSrv.URL, + Headers: endpointHeaders, + }) + return + } + w.WriteHeader(http.StatusNotFound) + })) + defer lifecycleSrv.Close() + + config := ConnectionConfig{Domain: lifecycleSrv.URL} + sb := &Sandbox{ + id: "sbx-headers", + config: &config, + lifecycle: config.lifecycleClient(), + } + + require.NoErrorf(t, sb.resolveExecd(context.Background()), "resolveExecd") + require.NoErrorf(t, sb.execd.Ping(context.Background()), "Ping") +} + +// TestResolveEgressForwardsAllEndpointHeaders verifies the same forwarding +// behavior for the egress sidecar client. +func TestResolveEgressForwardsAllEndpointHeaders(t *testing.T) { + endpointHeaders := map[string]string{ + "OPENSANDBOX-EGRESS-AUTH": "egress-tok", + "X-Route-Hint": "egress-vip", + } + + egressSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + for k, want := range endpointHeaders { + if got := r.Header.Get(k); got != want { + assert.Fail(t, fmt.Sprintf("header %s = %q, want %q", k, got, want)) + } + } + jsonResponse(w, http.StatusOK, PolicyStatusResponse{Status: "ok"}) + })) + defer egressSrv.Close() + + lifecycleSrv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method == http.MethodGet && strings.Contains(r.URL.Path, "/endpoints/") { + jsonResponse(w, http.StatusOK, Endpoint{ + Endpoint: egressSrv.URL, + Headers: endpointHeaders, + }) + return + } + w.WriteHeader(http.StatusNotFound) + })) + defer lifecycleSrv.Close() + + config := ConnectionConfig{Domain: lifecycleSrv.URL} + sb := &Sandbox{ + id: "sbx-egress-headers", + config: &config, + lifecycle: config.lifecycleClient(), + } + + require.NoErrorf(t, sb.resolveEgress(context.Background()), "resolveEgress") + _, err := sb.egress.GetPolicy(context.Background()) + require.NoErrorf(t, err, "GetPolicy") +} + func TestSandboxManager_ListFilter(t *testing.T) { now := time.Now().UTC().Truncate(time.Second) want := ListSandboxesResponse{ diff --git a/sdks/sandbox/go/retry_test.go b/sdks/sandbox/go/retry_test.go index 677cb3421..ce53ce6f2 100644 --- a/sdks/sandbox/go/retry_test.go +++ b/sdks/sandbox/go/retry_test.go @@ -22,7 +22,6 @@ import ( "crypto/rsa" "crypto/tls" "crypto/x509" - "encoding/json" "fmt" "math/big" "net/http" @@ -637,6 +636,3 @@ func TestRetry_CustomRetryableStatusCodes(t *testing.T) { require.Equal(t, "sbx-500-retried", got.ID) require.Equal(t, int32(2), attempts.Load()) } - -// suppress unused import warning -var _ = json.Marshal diff --git a/sdks/sandbox/go/sandbox.go b/sdks/sandbox/go/sandbox.go index 5be59ddf7..c57c71c3b 100644 --- a/sdks/sandbox/go/sandbox.go +++ b/sdks/sandbox/go/sandbox.go @@ -66,6 +66,10 @@ type SandboxCreateOptions struct { // Extensions for provider-specific parameters. Extensions map[string]string + // Platform selects the target OS/arch for the sandbox (e.g. {"os": + // "windows", "arch": "amd64"}). When nil the server applies its default. + Platform *PlatformSpec + // SkipHealthCheck skips the WaitUntilReady call after creation. SkipHealthCheck bool @@ -132,6 +136,7 @@ func CreateSandbox(ctx context.Context, config ConnectionConfig, opts SandboxCre NetworkPolicy: opts.NetworkPolicy, Volumes: opts.Volumes, Extensions: opts.Extensions, + Platform: opts.Platform, } if opts.Image != "" { req.Image = &ImageSpec{URI: opts.Image, Auth: opts.ImageAuth} @@ -227,9 +232,10 @@ func (s *Sandbox) Kill(ctx context.Context) error { } // Close releases local HTTP resources. Does NOT terminate the sandbox. -func (s *Sandbox) Close() { +func (s *Sandbox) Close() error { // No-op for now — Go's http.Client doesn't need explicit close. // Placeholder for future transport pooling. + return nil } // Pause pauses the sandbox while preserving its state. @@ -410,23 +416,19 @@ func (s *Sandbox) resolveExecd(ctx context.Context) error { execdURL = s.config.GetProtocol() + "://" + execdURL } - token := "" - var extraHeaders map[string]string - if endpoint.Headers != nil { - token = endpoint.Headers["X-EXECD-ACCESS-TOKEN"] - // Preserve all endpoint headers (e.g. routing headers) except the auth token - extraHeaders = make(map[string]string, len(endpoint.Headers)) - for k, v := range endpoint.Headers { - if k != "X-EXECD-ACCESS-TOKEN" { - extraHeaders[k] = v + headers := make(map[string]string, len(endpoint.Headers)+1) + for k, v := range endpoint.Headers { + headers[k] = v + } + if s.config.UseServerProxy { + if _, ok := headers[execdAuthHeader]; !ok { + if apiKey := s.config.GetAPIKey(); apiKey != "" { + headers[execdAuthHeader] = apiKey } } } - if s.config.UseServerProxy && token == "" { - token = s.config.GetAPIKey() - } - s.execd = s.config.execdClient(execdURL, token, extraHeaders) + s.execd = s.config.execdClient(execdURL, headers) return nil } @@ -450,18 +452,18 @@ func (s *Sandbox) resolveEgress(ctx context.Context) error { egressURL = s.config.GetProtocol() + "://" + egressURL } - token := "" - var extraHeaders map[string]string - if endpoint.Headers != nil { - token = endpoint.Headers["OPENSANDBOX-EGRESS-AUTH"] - extraHeaders = make(map[string]string, len(endpoint.Headers)) - for k, v := range endpoint.Headers { - if k != "OPENSANDBOX-EGRESS-AUTH" { - extraHeaders[k] = v + headers := make(map[string]string, len(endpoint.Headers)+1) + for k, v := range endpoint.Headers { + headers[k] = v + } + if s.config.UseServerProxy { + if _, ok := headers[egressAuthHeader]; !ok { + if apiKey := s.config.GetAPIKey(); apiKey != "" { + headers[egressAuthHeader] = apiKey } } } - s.egress = s.config.egressClient(egressURL, token, extraHeaders) + s.egress = s.config.egressClient(egressURL, headers) return nil } diff --git a/sdks/sandbox/go/sandbox_egress.go b/sdks/sandbox/go/sandbox_egress.go index 471293688..baa32403d 100644 --- a/sdks/sandbox/go/sandbox_egress.go +++ b/sdks/sandbox/go/sandbox_egress.go @@ -31,3 +31,12 @@ func (s *Sandbox) PatchEgressRules(ctx context.Context, rules []NetworkRule) (*P } return s.egress.PatchPolicy(ctx, rules) } + +// DeleteEgressRules removes egress rules matching the given targets from the +// current egress policy. Targets not present in the policy are silently ignored. +func (s *Sandbox) DeleteEgressRules(ctx context.Context, targets []string) (*PolicyStatusResponse, error) { + if err := s.resolveEgress(ctx); err != nil { + return nil, err + } + return s.egress.DeletePolicy(ctx, targets) +} diff --git a/sdks/sandbox/go/sandbox_test.go b/sdks/sandbox/go/sandbox_test.go new file mode 100644 index 000000000..2af2b1de0 --- /dev/null +++ b/sdks/sandbox/go/sandbox_test.go @@ -0,0 +1,234 @@ +// Copyright 2026 Alibaba Group Holding Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package opensandbox + +import ( + "context" + "fmt" + "net/http" + "net/http/httptest" + "strings" + "testing" + "time" +) + +func TestSandbox_Close(t *testing.T) { + sb := &Sandbox{id: "sbx-close"} + require.NoError(t, sb.Close(), "Close should return nil") +} + +func TestSandboxManager_Close(t *testing.T) { + mgr := &SandboxManager{} + require.NoError(t, mgr.Close(), "Close should return nil") +} + +func TestSandbox_Kill(t *testing.T) { + var ( + gotMethod string + gotPath string + ) + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + gotMethod = r.Method + gotPath = r.URL.Path + w.WriteHeader(http.StatusNoContent) + })) + defer srv.Close() + + sb := &Sandbox{ + id: "sbx-kill-test", + lifecycle: NewLifecycleClient(srv.URL, "test-key"), + } + + require.NoError(t, sb.Kill(context.Background())) + if gotMethod != http.MethodDelete { + assert.Fail(t, fmt.Sprintf("method = %q, want DELETE", gotMethod)) + } + if gotPath != "/sandboxes/sbx-kill-test" { + assert.Fail(t, fmt.Sprintf("path = %q, want /sandboxes/sbx-kill-test", gotPath)) + } +} + +func TestSandbox_GetInfo(t *testing.T) { + want := SandboxInfo{ + ID: "sbx-info", + Status: SandboxStatus{State: StateRunning}, + } + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + jsonResponse(w, http.StatusOK, want) + })) + defer srv.Close() + + sb := &Sandbox{ + id: "sbx-info", + lifecycle: NewLifecycleClient(srv.URL, "test-key"), + } + + got, err := sb.GetInfo(context.Background()) + require.NoErrorf(t, err, "GetInfo") + if got.ID != want.ID { + assert.Fail(t, fmt.Sprintf("ID = %q, want %q", got.ID, want.ID)) + } + if got.Status.State != StateRunning { + assert.Fail(t, fmt.Sprintf("State = %q, want %q", got.Status.State, StateRunning)) + } +} + +func TestSandbox_Ping_ExecdNil(t *testing.T) { + sb := &Sandbox{id: "sbx-no-execd"} + err := sb.Ping(context.Background()) + require.Error(t, err) + if !strings.Contains(err.Error(), "execd client not initialized") { + assert.Fail(t, fmt.Sprintf("error = %q, want contains 'execd client not initialized'", err.Error())) + } +} + +func TestSandbox_Ping_OK(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + })) + defer srv.Close() + + sb := &Sandbox{ + id: "sbx-ping-ok", + execd: NewExecdClient(srv.URL, "tok"), + } + + require.NoError(t, sb.Ping(context.Background())) +} + +func TestSandbox_IsHealthy_ExecdNil(t *testing.T) { + sb := &Sandbox{id: "sbx-no-execd"} + if sb.IsHealthy(context.Background()) { + assert.Fail(t, "IsHealthy should return false when execd is nil") + } +} + +func TestSandbox_IsHealthy_True(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + })) + defer srv.Close() + + sb := &Sandbox{ + id: "sbx-healthy", + execd: NewExecdClient(srv.URL, "tok"), + } + + if !sb.IsHealthy(context.Background()) { + assert.Fail(t, "IsHealthy should return true when execd /ping succeeds") + } +} + +func TestSandbox_Renew(t *testing.T) { + expiresAt := time.Now().UTC().Add(time.Hour).Truncate(time.Second) + want := RenewExpirationResponse{ + ExpiresAt: expiresAt, + } + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + assert.Fail(t, fmt.Sprintf("expected POST, got %s", r.Method)) + } + if !strings.HasSuffix(r.URL.Path, "/renew-expiration") { + assert.Fail(t, fmt.Sprintf("expected /renew-expiration suffix in path %s", r.URL.Path)) + } + jsonResponse(w, http.StatusOK, want) + })) + defer srv.Close() + + sb := &Sandbox{ + id: "sbx-renew", + lifecycle: NewLifecycleClient(srv.URL, "test-key"), + } + + got, err := sb.Renew(context.Background(), time.Hour) + require.NoErrorf(t, err, "Renew") + if got.ExpiresAt.Truncate(time.Second).Equal(expiresAt) { + return + } + assert.Fail(t, fmt.Sprintf("ExpiresAt = %v, want ~%v", got.ExpiresAt, expiresAt)) +} + +func TestSandbox_Pause(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + assert.Fail(t, fmt.Sprintf("expected POST, got %s", r.Method)) + } + if r.URL.Path != "/sandboxes/sbx-pause/pause" { + assert.Fail(t, fmt.Sprintf("path = %q, want /sandboxes/sbx-pause/pause", r.URL.Path)) + } + w.WriteHeader(http.StatusNoContent) + })) + defer srv.Close() + + sb := &Sandbox{ + id: "sbx-pause", + lifecycle: NewLifecycleClient(srv.URL, "test-key"), + } + + require.NoError(t, sb.Pause(context.Background())) +} + +func TestSandbox_CreateSnapshot(t *testing.T) { + want := SnapshotInfo{ + ID: "snap-1", + SandboxID: "sbx-snap", + } + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + assert.Fail(t, fmt.Sprintf("expected POST, got %s", r.Method)) + } + if r.URL.Path != "/sandboxes/sbx-snap/snapshots" { + assert.Fail(t, fmt.Sprintf("path = %q, want /sandboxes/sbx-snap/snapshots", r.URL.Path)) + } + jsonResponse(w, http.StatusCreated, want) + })) + defer srv.Close() + + sb := &Sandbox{ + id: "sbx-snap", + lifecycle: NewLifecycleClient(srv.URL, "test-key"), + } + + got, err := sb.CreateSnapshot(context.Background(), CreateSnapshotRequest{}) + require.NoErrorf(t, err, "CreateSnapshot") + if got.ID != "snap-1" { + assert.Fail(t, fmt.Sprintf("ID = %q, want snap-1", got.ID)) + } +} + +func TestSandbox_GetEndpoint(t *testing.T) { + want := Endpoint{ + Endpoint: "https://sbx-test.example.com:8080", + } + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if !strings.Contains(r.URL.Path, "/sandboxes/sbx-endpoint/endpoints/8080") { + assert.Fail(t, fmt.Sprintf("expected path containing /sandboxes/sbx-endpoint/endpoints/8080, got %s", r.URL.Path)) + } + jsonResponse(w, http.StatusOK, want) + })) + defer srv.Close() + + sb := &Sandbox{ + id: "sbx-endpoint", + lifecycle: NewLifecycleClient(srv.URL, "test-key"), + config: &ConnectionConfig{}, + } + + got, err := sb.GetEndpoint(context.Background(), 8080) + require.NoErrorf(t, err, "GetEndpoint") + if got.Endpoint != want.Endpoint { + assert.Fail(t, fmt.Sprintf("Endpoint = %q, want %q", got.Endpoint, want.Endpoint)) + } +} diff --git a/sdks/sandbox/go/types.go b/sdks/sandbox/go/types.go index a7f317d10..0b21fde38 100644 --- a/sdks/sandbox/go/types.go +++ b/sdks/sandbox/go/types.go @@ -55,6 +55,39 @@ type ImageAuth struct { Password string `json:"password"` } +// PlatformOS is the target operating system of a sandbox platform constraint. +// The wire-level enum is enforced server-side; the constants below mirror the +// spec so Go callers can avoid stringly-typed typos. +type PlatformOS string + +const ( + OSLinux PlatformOS = "linux" + OSWindows PlatformOS = "windows" +) + +// PlatformArch is the target CPU architecture of a sandbox platform +// constraint. +type PlatformArch string + +const ( + ArchAMD64 PlatformArch = "amd64" + ArchARM64 PlatformArch = "arm64" +) + +// PlatformSpec is a runtime platform constraint used for scheduling and +// provisioning. It is independent from Image and expresses the expected +// target OS and CPU architecture for sandbox execution. +// +// When omitted, the server applies its own default platform selection +// behavior. When provided, the runtime must satisfy the constraint or the +// request fails. +// +// See specs/sandbox-lifecycle.yml#/components/schemas/PlatformSpec. +type PlatformSpec struct { + OS PlatformOS `json:"os"` + Arch PlatformArch `json:"arch"` +} + // ResourceLimits defines runtime resource constraints as key-value pairs. // Common keys: "cpu" (e.g. "500m"), "memory" (e.g. "512Mi"), "gpu" (e.g. "1"). type ResourceLimits map[string]string @@ -120,6 +153,7 @@ type CreateSandboxRequest struct { NetworkPolicy *NetworkPolicy `json:"networkPolicy,omitempty"` Volumes []Volume `json:"volumes,omitempty"` Extensions map[string]string `json:"extensions,omitempty"` + Platform *PlatformSpec `json:"platform,omitempty"` } // SandboxInfo represents a runtime execution environment provisioned from a @@ -133,6 +167,7 @@ type SandboxInfo struct { Entrypoint []string `json:"entrypoint"` ExpiresAt *time.Time `json:"expiresAt,omitempty"` CreatedAt time.Time `json:"createdAt"` + Platform *PlatformSpec `json:"platform,omitempty"` } type SnapshotState string diff --git a/sdks/sandbox/javascript/src/adapters/egressAdapter.ts b/sdks/sandbox/javascript/src/adapters/egressAdapter.ts index 93aa90a8f..e2262ddae 100644 --- a/sdks/sandbox/javascript/src/adapters/egressAdapter.ts +++ b/sdks/sandbox/javascript/src/adapters/egressAdapter.ts @@ -22,6 +22,8 @@ type ApiGetPolicyOk = EgressPaths["/policy"]["get"]["responses"][200]["content"]["application/json"]; type ApiPatchRulesRequest = EgressPaths["/policy"]["patch"]["requestBody"]["content"]["application/json"]; +type ApiDeleteRulesRequest = + EgressPaths["/policy"]["delete"]["requestBody"]["content"]["application/json"]; export class EgressAdapter implements Egress { constructor(private readonly client: EgressClient) {} @@ -43,4 +45,12 @@ export class EgressAdapter implements Egress { }); throwOnOpenApiFetchError({ error, response }, "Patch sandbox egress rules failed"); } + + async deleteRules(targets: string[]): Promise { + const body: ApiDeleteRulesRequest = targets; + const { error, response } = await this.client.DELETE("/policy", { + body, + }); + throwOnOpenApiFetchError({ error, response }, "Delete sandbox egress rules failed"); + } } diff --git a/sdks/sandbox/javascript/src/adapters/healthAdapter.ts b/sdks/sandbox/javascript/src/adapters/healthAdapter.ts index a2ffce224..836f46a9b 100644 --- a/sdks/sandbox/javascript/src/adapters/healthAdapter.ts +++ b/sdks/sandbox/javascript/src/adapters/healthAdapter.ts @@ -20,7 +20,7 @@ export class HealthAdapter implements ExecdHealth { constructor(private readonly client: ExecdClient) {} async ping(): Promise { - const { error, response } = await this.client.GET("/ping"); + const { error, response } = await this.client.GET("/ping", { parseAs: "text" }); throwOnOpenApiFetchError({ error, response }, "Execd ping failed"); return true; } diff --git a/sdks/sandbox/javascript/src/api/egress.ts b/sdks/sandbox/javascript/src/api/egress.ts index 2cca3230a..934e869d7 100644 --- a/sdks/sandbox/javascript/src/api/egress.ts +++ b/sdks/sandbox/javascript/src/api/egress.ts @@ -54,7 +54,41 @@ export interface paths { }; put?: never; post?: never; - delete?: never; + /** + * Delete egress rules + * @description Remove specific egress rules from the currently enforced policy by target. + * + * - Accepts a list of target strings (FQDNs or wildcard domains). + * - Matching rules are removed; targets not found in the current policy + * are silently ignored (idempotent). + */ + delete: { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + requestBody: { + content: { + "application/json": string[]; + }; + }; + responses: { + /** @description Rules removed successfully. */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": components["schemas"]["PolicyStatusResponse"]; + }; + }; + 400: components["responses"]["BadRequest"]; + 401: components["responses"]["Unauthorized"]; + 500: components["responses"]["InternalServerError"]; + }; + }; options?: never; head?: never; /** diff --git a/sdks/sandbox/javascript/src/sandbox.ts b/sdks/sandbox/javascript/src/sandbox.ts index 4315f7a44..6ad984322 100644 --- a/sdks/sandbox/javascript/src/sandbox.ts +++ b/sdks/sandbox/javascript/src/sandbox.ts @@ -570,6 +570,10 @@ export class Sandbox { await Sandbox._priv.get(this)!.egress.patchRules(rules); } + async deleteEgressRules(targets: string[]): Promise { + await Sandbox._priv.get(this)!.egress.deleteRules(targets); + } + /** * Get sandbox endpoint for a port (STRICT: no scheme), e.g. "localhost:44772" or "domain/route/.../44772". */ diff --git a/sdks/sandbox/javascript/src/services/egress.ts b/sdks/sandbox/javascript/src/services/egress.ts index 0a248d725..ff9efe3b7 100644 --- a/sdks/sandbox/javascript/src/services/egress.ts +++ b/sdks/sandbox/javascript/src/services/egress.ts @@ -24,4 +24,12 @@ export interface Egress { * the first rule for a target wins. The current defaultAction is preserved. */ patchRules(rules: NetworkRule[]): Promise; + /** + * Delete egress rules by target. + * + * Each entry is a FQDN or wildcard domain. Matching rules are removed from + * the currently enforced policy. Targets not present in the policy are + * silently ignored (idempotent). The current defaultAction is preserved. + */ + deleteRules(targets: string[]): Promise; } diff --git a/sdks/sandbox/javascript/tests/health.test.mjs b/sdks/sandbox/javascript/tests/health.test.mjs new file mode 100644 index 000000000..7f892cdfa --- /dev/null +++ b/sdks/sandbox/javascript/tests/health.test.mjs @@ -0,0 +1,42 @@ +// Copyright 2026 Alibaba Group Holding Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import assert from "node:assert/strict"; +import test from "node:test"; + +import { HealthAdapter, createExecdClient } from "../dist/internal.js"; +import { SandboxApiException } from "../dist/index.js"; + +test("HealthAdapter treats empty 200 ping responses as healthy", async () => { + const health = new HealthAdapter(createExecdClient({ + baseUrl: "http://execd.test", + async fetch(request) { + assert.equal(new URL(request.url).pathname, "/ping"); + return new Response("", { status: 200 }); + }, + })); + + assert.equal(await health.ping(), true); +}); + +test("HealthAdapter still maps ping API errors", async () => { + const health = new HealthAdapter(createExecdClient({ + baseUrl: "http://execd.test", + async fetch() { + return Response.json({ code: "UNAVAILABLE", message: "not ready" }, { status: 503 }); + }, + })); + + await assert.rejects(() => health.ping(), SandboxApiException); +}); diff --git a/sdks/sandbox/kotlin/gradle.properties b/sdks/sandbox/kotlin/gradle.properties index ad3c8fa62..688c7d3be 100644 --- a/sdks/sandbox/kotlin/gradle.properties +++ b/sdks/sandbox/kotlin/gradle.properties @@ -5,5 +5,5 @@ org.gradle.parallel=true # Project metadata project.group=com.alibaba.opensandbox -project.version=1.0.11 +project.version=1.0.12 project.description=A Kotlin SDK for Open Sandbox API diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/Sandbox.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/Sandbox.kt index 75cf9b66c..b490015c1 100644 --- a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/Sandbox.kt +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/Sandbox.kt @@ -528,6 +528,19 @@ class Sandbox internal constructor( egressService.patchRules(rules) } + /** + * Deletes egress rules for this sandbox by target. + * + * Each entry is a FQDN or wildcard domain. Matching rules are removed from + * the currently enforced policy. Targets not present in the policy are + * silently ignored (idempotent). The current defaultAction is preserved. + * + * @throws SandboxException if operation fails + */ + fun deleteEgressRules(targets: List) { + egressService.deleteRules(targets) + } + /** * Pauses the sandbox while preserving its state. * diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/config/ConnectionConfig.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/config/ConnectionConfig.kt index b9a7e6019..64888e54e 100644 --- a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/config/ConnectionConfig.kt +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/config/ConnectionConfig.kt @@ -71,7 +71,7 @@ class ConnectionConfig private constructor( private const val ENV_API_KEY = "OPEN_SANDBOX_API_KEY" private const val ENV_DOMAIN = "OPEN_SANDBOX_DOMAIN" - private const val DEFAULT_USER_AGENT = "OpenSandbox-Kotlin-SDK/1.0.11" + private const val DEFAULT_USER_AGENT = "OpenSandbox-Kotlin-SDK/1.0.12" private const val API_VERSION = "v1" @JvmStatic diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/exceptions/SandboxException.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/exceptions/SandboxException.kt index c57ce63b1..25092de02 100644 --- a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/exceptions/SandboxException.kt +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/exceptions/SandboxException.kt @@ -180,6 +180,9 @@ data class SandboxError( const val INVALID_ARGUMENT = "INVALID_ARGUMENT" const val UNEXPECTED_RESPONSE = "UNEXPECTED_RESPONSE" + /** The requested file or directory does not exist (server responds with HTTP 404). */ + const val FILE_NOT_FOUND = "FILE_NOT_FOUND" + /** Pool-specific: no idle sandbox and policy is FAIL_FAST. */ const val POOL_EMPTY = "POOL_EMPTY" diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/models/execd/executions/RunCommandRequest.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/models/execd/executions/RunCommandRequest.kt index f080486a6..18b9fbdfd 100644 --- a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/models/execd/executions/RunCommandRequest.kt +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/models/execd/executions/RunCommandRequest.kt @@ -16,7 +16,8 @@ package com.alibaba.opensandbox.sandbox.domain.models.execd.executions -import kotlin.time.Duration +import java.time.Duration +import kotlin.time.toJavaDuration /** * Parameters for command execution. @@ -75,11 +76,19 @@ class RunCommandRequest private constructor( * Maximum execution time; server will terminate the command when reached. * If omitted, the server will not enforce any timeout. */ - fun timeout(timeout: Duration?): Builder { + fun timeout(timeout: Duration): Builder { this.timeout = timeout return this } + @Deprecated( + message = "Use java.time.Duration instead.", + replaceWith = ReplaceWith("timeout(timeout.toJavaDuration())", "kotlin.time.toJavaDuration"), + ) + fun timeout(timeout: kotlin.time.Duration): Builder { + return timeout(timeout.toJavaDuration()) + } + fun uid(uid: Int?): Builder { require(uid == null || uid >= 0) { "Uid must be >= 0" } this.uid = uid diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/models/execd/executions/RunInSessionRequest.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/models/execd/executions/RunInSessionRequest.kt index fb7dd9ae4..699a0e698 100644 --- a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/models/execd/executions/RunInSessionRequest.kt +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/models/execd/executions/RunInSessionRequest.kt @@ -16,7 +16,8 @@ package com.alibaba.opensandbox.sandbox.domain.models.execd.executions -import kotlin.time.Duration +import java.time.Duration +import kotlin.time.toJavaDuration /** * Request to run a command in an existing bash session. @@ -54,11 +55,19 @@ class RunInSessionRequest private constructor( return this } - fun timeout(timeout: Duration?): Builder { + fun timeout(timeout: Duration): Builder { this.timeout = timeout return this } + @Deprecated( + message = "Use java.time.Duration instead.", + replaceWith = ReplaceWith("timeout(timeout.toJavaDuration())", "kotlin.time.toJavaDuration"), + ) + fun timeout(timeout: kotlin.time.Duration): Builder { + return timeout(timeout.toJavaDuration()) + } + fun handlers(handlers: ExecutionHandlers?): Builder { this.handlers = handlers return this diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/services/Commands.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/services/Commands.kt index fcf8af170..db9b8940e 100644 --- a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/services/Commands.kt +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/services/Commands.kt @@ -21,7 +21,8 @@ import com.alibaba.opensandbox.sandbox.domain.models.execd.executions.CommandSta import com.alibaba.opensandbox.sandbox.domain.models.execd.executions.Execution import com.alibaba.opensandbox.sandbox.domain.models.execd.executions.RunCommandRequest import com.alibaba.opensandbox.sandbox.domain.models.execd.executions.RunInSessionRequest -import kotlin.time.Duration +import java.time.Duration +import kotlin.time.toJavaDuration /** * Command execution operations for sandbox environments. @@ -115,14 +116,31 @@ interface Commands { workingDirectory: String? = null, timeout: Duration? = null, ): Execution { - return runInSession( - sessionId, + val builder = RunInSessionRequest.builder() .command(command) .workingDirectory(workingDirectory) - .timeout(timeout) - .build(), - ) + if (timeout != null) { + builder.timeout(timeout) + } + return runInSession(sessionId, builder.build()) + } + + @Deprecated( + message = "Use java.time.Duration instead.", + replaceWith = + ReplaceWith( + "runInSession(sessionId, command, workingDirectory, timeout.toJavaDuration())", + "kotlin.time.toJavaDuration", + ), + ) + fun runInSession( + sessionId: String, + command: String, + workingDirectory: String? = null, + timeout: kotlin.time.Duration, + ): Execution { + return runInSession(sessionId, command, workingDirectory, timeout.toJavaDuration()) } /** diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/services/Egress.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/services/Egress.kt index 61aa78e5b..dc0b44b37 100644 --- a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/services/Egress.kt +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/services/Egress.kt @@ -23,4 +23,6 @@ interface Egress { fun getPolicy(): NetworkPolicy fun patchRules(rules: List) + + fun deleteRules(targets: List) } diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/adapters/converter/ExceptionConverter.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/adapters/converter/ExceptionConverter.kt index 5ea52f04b..2c8d287dc 100644 --- a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/adapters/converter/ExceptionConverter.kt +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/adapters/converter/ExceptionConverter.kt @@ -39,6 +39,21 @@ import com.alibaba.opensandbox.sandbox.api.execd.infrastructure.ClientException import com.alibaba.opensandbox.sandbox.api.execd.infrastructure.ServerError as ExecdServerError import com.alibaba.opensandbox.sandbox.api.execd.infrastructure.ServerException as ExecdServerException +/** + * Returns `true` when this throwable represents an expected "file or directory does not exist" + * outcome rather than a genuine failure. + * + * Detection is intentionally restricted to the explicit [SandboxError.FILE_NOT_FOUND] server + * error code rather than a bare HTTP 404. A 404 whose body cannot be parsed is mapped to + * [SandboxError.UNEXPECTED_RESPONSE] and may indicate a real endpoint/routing/configuration + * regression, which must stay loud (ERROR) instead of being silently downgraded. + * + * Callers (and the adapters themselves) use this to avoid treating a missing file as an error, + * e.g. logging it at ERROR level with a full stack trace, which is just noise for a perfectly + * normal control-flow case such as polling for a not-yet-created file. + */ +fun Throwable.isFileNotFound(): Boolean = this is SandboxApiException && error.code == SandboxError.FILE_NOT_FOUND + fun Exception.toSandboxException(): SandboxException { return when (this) { is SandboxException -> this diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/adapters/converter/ExecutionConverter.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/adapters/converter/ExecutionConverter.kt index 9e64d11ba..e07126ba3 100644 --- a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/adapters/converter/ExecutionConverter.kt +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/adapters/converter/ExecutionConverter.kt @@ -18,6 +18,7 @@ package com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter import com.alibaba.opensandbox.sandbox.domain.models.execd.executions.CommandStatus import com.alibaba.opensandbox.sandbox.domain.models.execd.executions.RunCommandRequest +import java.time.Duration import com.alibaba.opensandbox.sandbox.api.models.execd.CommandStatusResponse as ApiCommandStatusResponse import com.alibaba.opensandbox.sandbox.api.models.execd.RunCommandRequest as ApiRunCommandRequest @@ -27,7 +28,7 @@ object ExecutionConverter { command = command, background = background, cwd = workingDirectory, - timeout = timeout?.inWholeMilliseconds, + timeout = timeout?.toCommandTimeoutMillis(), uid = uid, gid = gid, envs = envs, @@ -46,3 +47,12 @@ object ExecutionConverter { ) } } + +internal fun Duration.toCommandTimeoutMillis(): Long { + require(!isNegative) { "Timeout must be non-negative, got: $this" } + return try { + toMillis() + } catch (e: ArithmeticException) { + throw IllegalArgumentException("Timeout is too large to represent in milliseconds: $this", e) + } +} diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/adapters/service/CommandsAdapter.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/adapters/service/CommandsAdapter.kt index 76d2b249e..3eb201537 100644 --- a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/adapters/service/CommandsAdapter.kt +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/adapters/service/CommandsAdapter.kt @@ -42,6 +42,7 @@ import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.Executi import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.ExecutionEventDispatcher import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.jsonParser import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.parseSandboxError +import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.toCommandTimeoutMillis import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.toSandboxException import okhttp3.Headers.Companion.toHeaders import okhttp3.HttpUrl.Companion.toHttpUrlOrNull @@ -196,7 +197,7 @@ internal class CommandsAdapter( RunInSessionRequestApi( command = request.command, cwd = request.workingDirectory, - timeout = request.timeout?.inWholeMilliseconds, + timeout = request.timeout?.toCommandTimeoutMillis(), ) val runUrl = execdBaseUrl diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/adapters/service/EgressAdapter.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/adapters/service/EgressAdapter.kt index fc3b6192a..dc4460065 100644 --- a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/adapters/service/EgressAdapter.kt +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/adapters/service/EgressAdapter.kt @@ -66,4 +66,13 @@ internal class EgressAdapter( throw e.toSandboxException() } } + + override fun deleteRules(targets: List) { + try { + api.policyDelete(targets) + } catch (e: Exception) { + logger.error("Failed to delete egress rules via endpoint {}", egressEndpoint.endpoint, e) + throw e.toSandboxException() + } + } } diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/adapters/service/FilesystemAdapter.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/adapters/service/FilesystemAdapter.kt index 7fa5404d7..cad154073 100644 --- a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/adapters/service/FilesystemAdapter.kt +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/adapters/service/FilesystemAdapter.kt @@ -35,6 +35,7 @@ import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.Filesys import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.FilesystemConverter.toApiReplaceFileContentMap import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.FilesystemConverter.toEntryInfo import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.FilesystemConverter.toEntryInfoMap +import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.isFileNotFound import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.parseSandboxError import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.toSandboxException import kotlinx.serialization.json.buildJsonObject @@ -108,7 +109,7 @@ internal class FilesystemAdapter( return response.body?.source()?.readString(charset) ?: "" } } catch (e: Exception) { - logger.error("Failed to read file with encoding $encoding: $path", e) + logReadFailure("Failed to read file with encoding $encoding: $path", e) throw e.toSandboxException() } } @@ -134,7 +135,7 @@ internal class FilesystemAdapter( return response.body?.bytes() ?: ByteArray(0) } } catch (e: Exception) { - logger.error("Failed to read file as byte array: $path", e) + logReadFailure("Failed to read file as byte array: $path", e) throw e.toSandboxException() } } @@ -167,7 +168,7 @@ internal class FilesystemAdapter( return response.body?.byteStream() ?: throw IllegalStateException("Response body is null") } catch (e: Exception) { - logger.error("Failed to read file as stream: $path", e) + logReadFailure("Failed to read file as stream: $path", e) throw e.toSandboxException() } } @@ -335,6 +336,26 @@ internal class FilesystemAdapter( } } + /** + * Logs a failed read operation, distinguishing genuine failures from the expected + * "file does not exist" case. + * + * A missing file is a normal control-flow outcome (e.g. polling for a not-yet-created + * file), so it is logged at DEBUG level instead of ERROR to avoid flooding callers' + * error logs and monitoring with stack traces for a non-error condition. The exception + * is still propagated to the caller unchanged. + */ + private fun logReadFailure( + message: String, + e: Exception, + ) { + if (e.isFileNotFound()) { + logger.debug(message, e) + } else { + logger.error(message, e) + } + } + private fun getCharsetFromEncoding(encoding: String): Charset { try { return charset(encoding) diff --git a/sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/SandboxTest.kt b/sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/SandboxTest.kt index 9c0041368..295f92e15 100644 --- a/sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/SandboxTest.kt +++ b/sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/SandboxTest.kt @@ -218,6 +218,16 @@ class SandboxTest { verify { egressService.patchRules(rules) } } + @Test + fun `deleteEgressRules should delegate to egressService`() { + val targets = listOf("bad.example.com", "*.blocked.org") + every { egressService.deleteRules(targets) } just Runs + + sandbox.deleteEgressRules(targets) + + verify { egressService.deleteRules(targets) } + } + @Test fun `builder manualCleanup should clear timeout`() { val builder = diff --git a/sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/domain/models/execd/executions/RunCommandRequestTest.kt b/sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/domain/models/execd/executions/RunCommandRequestTest.kt new file mode 100644 index 000000000..c3bfcce05 --- /dev/null +++ b/sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/domain/models/execd/executions/RunCommandRequestTest.kt @@ -0,0 +1,47 @@ +/* + * Copyright 2025 Alibaba Group Holding Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.alibaba.opensandbox.sandbox.domain.models.execd.executions + +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Test +import java.time.Duration +import kotlin.time.Duration.Companion.seconds + +class RunCommandRequestTest { + @Test + fun `builder accepts java duration for timeout`() { + val request = + RunCommandRequest.builder() + .command("echo hi") + .timeout(Duration.ofSeconds(5)) + .build() + + assertEquals(Duration.ofSeconds(5), request.timeout) + } + + @Suppress("DEPRECATION") + @Test + fun `builder accepts deprecated kotlin duration for timeout`() { + val request = + RunCommandRequest.builder() + .command("echo hi") + .timeout(5.seconds) + .build() + + assertEquals(Duration.ofSeconds(5), request.timeout) + } +} diff --git a/sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/domain/models/execd/executions/RunInSessionRequestTest.kt b/sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/domain/models/execd/executions/RunInSessionRequestTest.kt new file mode 100644 index 000000000..6dd02a907 --- /dev/null +++ b/sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/domain/models/execd/executions/RunInSessionRequestTest.kt @@ -0,0 +1,47 @@ +/* + * Copyright 2025 Alibaba Group Holding Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.alibaba.opensandbox.sandbox.domain.models.execd.executions + +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Test +import java.time.Duration +import kotlin.time.Duration.Companion.seconds + +class RunInSessionRequestTest { + @Test + fun `builder accepts java duration for timeout`() { + val request = + RunInSessionRequest.builder() + .command("echo hi") + .timeout(Duration.ofSeconds(5)) + .build() + + assertEquals(Duration.ofSeconds(5), request.timeout) + } + + @Suppress("DEPRECATION") + @Test + fun `builder accepts deprecated kotlin duration for timeout`() { + val request = + RunInSessionRequest.builder() + .command("echo hi") + .timeout(5.seconds) + .build() + + assertEquals(Duration.ofSeconds(5), request.timeout) + } +} diff --git a/sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/adapters/service/CommandsAdapterTest.kt b/sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/adapters/service/CommandsAdapterTest.kt index 0985dbe3b..87cca06a8 100644 --- a/sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/adapters/service/CommandsAdapterTest.kt +++ b/sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/adapters/service/CommandsAdapterTest.kt @@ -25,6 +25,7 @@ import com.alibaba.opensandbox.sandbox.domain.models.execd.executions.ExecutionH import com.alibaba.opensandbox.sandbox.domain.models.execd.executions.RunCommandRequest import com.alibaba.opensandbox.sandbox.domain.models.execd.executions.RunInSessionRequest import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxEndpoint +import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.toCommandTimeoutMillis import kotlinx.serialization.json.Json import kotlinx.serialization.json.booleanOrNull import kotlinx.serialization.json.intOrNull @@ -39,9 +40,9 @@ import org.junit.jupiter.api.Assertions.assertTrue import org.junit.jupiter.api.BeforeEach import org.junit.jupiter.api.Test import org.junit.jupiter.api.assertThrows +import java.time.Duration import java.util.concurrent.CountDownLatch import java.util.concurrent.TimeUnit -import kotlin.time.Duration.Companion.seconds class CommandsAdapterTest { // CommandsAdapter unit tests @@ -340,7 +341,7 @@ data: {"type":"execution_complete","execution_time":100,"timestamp":167253120100 RunInSessionRequest.builder() .command("echo Hello") .workingDirectory("/workspace") - .timeout(5.seconds) + .timeout(Duration.ofSeconds(5)) .handlers(handlers) .build(), ) @@ -359,6 +360,15 @@ data: {"type":"execution_complete","execution_time":100,"timestamp":167253120100 assertEquals(5000L, requestBodyJson["timeout"]?.jsonPrimitive?.content?.toLong()) } + @Test + fun `command timeout conversion should reject durations too large for milliseconds`() { + val exception = + assertThrows(IllegalArgumentException::class.java) { + Duration.ofSeconds(Long.MAX_VALUE).toCommandTimeoutMillis() + } + assertTrue(exception.message!!.contains("too large to represent in milliseconds")) + } + @Test fun `runInSession should infer non-zero exit code from command error event`() { val initEvent = """data: {"type":"init","text":"cmd-123","timestamp":1672531200000}""" diff --git a/sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/adapters/service/FilesystemAdapterTest.kt b/sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/adapters/service/FilesystemAdapterTest.kt new file mode 100644 index 000000000..7bbb95754 --- /dev/null +++ b/sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/infrastructure/adapters/service/FilesystemAdapterTest.kt @@ -0,0 +1,142 @@ +/* + * Copyright 2025 Alibaba Group Holding Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.alibaba.opensandbox.sandbox.infrastructure.adapters.service + +import com.alibaba.opensandbox.sandbox.HttpClientProvider +import com.alibaba.opensandbox.sandbox.config.ConnectionConfig +import com.alibaba.opensandbox.sandbox.domain.exceptions.SandboxApiException +import com.alibaba.opensandbox.sandbox.domain.exceptions.SandboxError +import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxEndpoint +import com.alibaba.opensandbox.sandbox.infrastructure.adapters.converter.isFileNotFound +import okhttp3.mockwebserver.MockResponse +import okhttp3.mockwebserver.MockWebServer +import org.junit.jupiter.api.AfterEach +import org.junit.jupiter.api.Assertions.assertEquals +import org.junit.jupiter.api.Assertions.assertFalse +import org.junit.jupiter.api.Assertions.assertTrue +import org.junit.jupiter.api.BeforeEach +import org.junit.jupiter.api.Test +import org.junit.jupiter.api.assertThrows + +class FilesystemAdapterTest { + private lateinit var mockWebServer: MockWebServer + private lateinit var filesystemAdapter: FilesystemAdapter + private lateinit var httpClientProvider: HttpClientProvider + + @BeforeEach + fun setUp() { + mockWebServer = MockWebServer() + mockWebServer.start() + + val host = mockWebServer.hostName + val port = mockWebServer.port + val endpoint = SandboxEndpoint("$host:$port") + + val config = + ConnectionConfig.builder() + .domain("$host:$port") + .protocol("http") + .build() + + httpClientProvider = HttpClientProvider(config) + filesystemAdapter = FilesystemAdapter(httpClientProvider, endpoint) + } + + @AfterEach + fun tearDown() { + mockWebServer.shutdown() + httpClientProvider.close() + } + + @Test + fun `readFile surfaces FILE_NOT_FOUND error code on 404 so callers can distinguish it`() { + mockWebServer.enqueue( + MockResponse() + .setResponseCode(404) + .setBody( + """{"code":"FILE_NOT_FOUND","message":"file not found. open /tmp/missing.txt: no such file or directory"}""", + ), + ) + + val exception = + assertThrows { + filesystemAdapter.readFile("/tmp/missing.txt", "UTF-8", null) + } + + assertEquals(404, exception.statusCode) + assertEquals(SandboxError.FILE_NOT_FOUND, exception.error.code) + // The exception itself is recognised as a "not found" condition, which is what the + // adapter relies on to avoid emitting ERROR-level log noise for an expected outcome. + assertTrue(exception.isFileNotFound()) + } + + @Test + fun `readFile returns content on success`() { + mockWebServer.enqueue( + MockResponse() + .setResponseCode(200) + .setBody("hello world"), + ) + + val content = filesystemAdapter.readFile("/tmp/hello.txt", "UTF-8", null) + + assertEquals("hello world", content) + } + + @Test + fun `isFileNotFound is true for FILE_NOT_FOUND error code`() { + val exception = + SandboxApiException( + message = "Failed to read file. Status code: 404", + statusCode = 404, + error = SandboxError(SandboxError.FILE_NOT_FOUND), + ) + + assertTrue(exception.isFileNotFound()) + } + + @Test + fun `isFileNotFound is false for other API errors`() { + val exception = + SandboxApiException( + message = "Internal server error", + statusCode = 500, + error = SandboxError(SandboxError.UNEXPECTED_RESPONSE), + ) + + assertFalse(exception.isFileNotFound()) + } + + @Test + fun `isFileNotFound is false for a 404 without an explicit FILE_NOT_FOUND code`() { + // A 404 whose body could not be parsed is mapped to UNEXPECTED_RESPONSE. It may indicate a + // real endpoint/routing regression, so it must NOT be downgraded to a not-found condition. + val exception = + SandboxApiException( + message = "Failed to read file. Status code: 404", + statusCode = 404, + error = SandboxError(SandboxError.UNEXPECTED_RESPONSE), + ) + + assertFalse(exception.isFileNotFound()) + } + + @Test + fun `isFileNotFound is false for non-sandbox exceptions`() { + assertFalse(RuntimeException("boom").isFileNotFound()) + } +} diff --git a/sdks/sandbox/python/src/opensandbox/adapters/egress_adapter.py b/sdks/sandbox/python/src/opensandbox/adapters/egress_adapter.py index 99a00e9d4..7db99867a 100644 --- a/sdks/sandbox/python/src/opensandbox/adapters/egress_adapter.py +++ b/sdks/sandbox/python/src/opensandbox/adapters/egress_adapter.py @@ -110,3 +110,16 @@ async def patch_rules(self, rules: list[NetworkRule]) -> None: except Exception as e: logger.error("Failed to patch egress policy via endpoint %s", self.endpoint.endpoint, exc_info=e) raise ExceptionConverter.to_sandbox_exception(e) from e + + async def delete_rules(self, targets: list[str]) -> None: + try: + from opensandbox.api.egress.api.policy import delete_policy + + response_obj = await delete_policy.asyncio_detailed( + client=self._client, + body=list(targets), + ) + handle_api_error(response_obj, "Delete egress rules") + except Exception as e: + logger.error("Failed to delete egress rules via endpoint %s", self.endpoint.endpoint, exc_info=e) + raise ExceptionConverter.to_sandbox_exception(e) from e diff --git a/sdks/sandbox/python/src/opensandbox/api/egress/api/policy/delete_policy.py b/sdks/sandbox/python/src/opensandbox/api/egress/api/policy/delete_policy.py new file mode 100644 index 000000000..cae7bb9a6 --- /dev/null +++ b/sdks/sandbox/python/src/opensandbox/api/egress/api/policy/delete_policy.py @@ -0,0 +1,211 @@ +# +# Copyright 2026 Alibaba Group Holding Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from http import HTTPStatus +from typing import Any + +import httpx + +from ... import errors +from ...client import AuthenticatedClient, Client +from ...models.policy_status_response import PolicyStatusResponse +from ...types import Response + + +def _get_kwargs( + *, + body: list[str], +) -> dict[str, Any]: + headers: dict[str, Any] = {} + + _kwargs: dict[str, Any] = { + "method": "delete", + "url": "/policy", + } + + _kwargs["json"] = body + + headers["Content-Type"] = "application/json" + + _kwargs["headers"] = headers + return _kwargs + + +def _parse_response( + *, client: AuthenticatedClient | Client, response: httpx.Response +) -> PolicyStatusResponse | str | None: + if response.status_code == 200: + response_200 = PolicyStatusResponse.from_dict(response.json()) + + return response_200 + + if response.status_code == 400: + response_400 = response.text + return response_400 + + if response.status_code == 401: + response_401 = response.text + return response_401 + + if response.status_code == 500: + response_500 = response.text + return response_500 + + if client.raise_on_unexpected_status: + raise errors.UnexpectedStatus(response.status_code, response.content) + else: + return None + + +def _build_response( + *, client: AuthenticatedClient | Client, response: httpx.Response +) -> Response[PolicyStatusResponse | str]: + return Response( + status_code=HTTPStatus(response.status_code), + content=response.content, + headers=response.headers, + parsed=_parse_response(client=client, response=response), + ) + + +def sync_detailed( + *, + client: AuthenticatedClient | Client, + body: list[str], +) -> Response[PolicyStatusResponse | str]: + """Delete egress rules + + Remove specific egress rules from the currently enforced policy by target. + + - Accepts a list of target strings (FQDNs or wildcard domains). + - Matching rules are removed; targets not found in the current policy + are silently ignored (idempotent). + + Args: + body (list[str]): Example: ['bad.example.com', '*.blocked.org']. + + Raises: + errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True. + httpx.TimeoutException: If the request takes longer than Client.timeout. + + Returns: + Response[PolicyStatusResponse | str] + """ + + kwargs = _get_kwargs( + body=body, + ) + + response = client.get_httpx_client().request( + **kwargs, + ) + + return _build_response(client=client, response=response) + + +def sync( + *, + client: AuthenticatedClient | Client, + body: list[str], +) -> PolicyStatusResponse | str | None: + """Delete egress rules + + Remove specific egress rules from the currently enforced policy by target. + + - Accepts a list of target strings (FQDNs or wildcard domains). + - Matching rules are removed; targets not found in the current policy + are silently ignored (idempotent). + + Args: + body (list[str]): Example: ['bad.example.com', '*.blocked.org']. + + Raises: + errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True. + httpx.TimeoutException: If the request takes longer than Client.timeout. + + Returns: + PolicyStatusResponse | str + """ + + return sync_detailed( + client=client, + body=body, + ).parsed + + +async def asyncio_detailed( + *, + client: AuthenticatedClient | Client, + body: list[str], +) -> Response[PolicyStatusResponse | str]: + """Delete egress rules + + Remove specific egress rules from the currently enforced policy by target. + + - Accepts a list of target strings (FQDNs or wildcard domains). + - Matching rules are removed; targets not found in the current policy + are silently ignored (idempotent). + + Args: + body (list[str]): Example: ['bad.example.com', '*.blocked.org']. + + Raises: + errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True. + httpx.TimeoutException: If the request takes longer than Client.timeout. + + Returns: + Response[PolicyStatusResponse | str] + """ + + kwargs = _get_kwargs( + body=body, + ) + + response = await client.get_async_httpx_client().request(**kwargs) + + return _build_response(client=client, response=response) + + +async def asyncio( + *, + client: AuthenticatedClient | Client, + body: list[str], +) -> PolicyStatusResponse | str | None: + """Delete egress rules + + Remove specific egress rules from the currently enforced policy by target. + + - Accepts a list of target strings (FQDNs or wildcard domains). + - Matching rules are removed; targets not found in the current policy + are silently ignored (idempotent). + + Args: + body (list[str]): Example: ['bad.example.com', '*.blocked.org']. + + Raises: + errors.UnexpectedStatus: If the server returns an undocumented status code and Client.raise_on_unexpected_status is True. + httpx.TimeoutException: If the request takes longer than Client.timeout. + + Returns: + PolicyStatusResponse | str + """ + + return ( + await asyncio_detailed( + client=client, + body=body, + ) + ).parsed diff --git a/sdks/sandbox/python/src/opensandbox/sandbox.py b/sdks/sandbox/python/src/opensandbox/sandbox.py index ac6b06b24..90e997c77 100644 --- a/sdks/sandbox/python/src/opensandbox/sandbox.py +++ b/sdks/sandbox/python/src/opensandbox/sandbox.py @@ -310,6 +310,17 @@ async def patch_egress_rules(self, rules: list[NetworkRule]) -> None: """ await self._egress_service.patch_rules(rules) + async def delete_egress_rules(self, targets: list[str]) -> None: + """ + Delete egress rules for this sandbox by target. + + Each entry is a FQDN or wildcard domain. Matching rules are removed + from the currently enforced policy. Targets not present in the policy + are silently ignored (idempotent). The current defaultAction is + preserved. + """ + await self._egress_service.delete_rules(targets) + async def pause(self) -> None: """ Pause the sandbox while preserving its state. diff --git a/sdks/sandbox/python/src/opensandbox/services/egress.py b/sdks/sandbox/python/src/opensandbox/services/egress.py index 89e8a162f..c863bd40f 100644 --- a/sdks/sandbox/python/src/opensandbox/services/egress.py +++ b/sdks/sandbox/python/src/opensandbox/services/egress.py @@ -50,3 +50,17 @@ async def patch_rules(self, rules: list[NetworkRule]) -> None: SandboxException: if the operation fails """ ... + + async def delete_rules(self, targets: list[str]) -> None: + """ + Delete egress rules by target via the sidecar policy API. + + Each entry is a FQDN or wildcard domain. Matching rules are removed + from the currently enforced policy. Targets not present in the policy + are silently ignored (idempotent). The current defaultAction is + preserved. + + Raises: + SandboxException: if the operation fails + """ + ... diff --git a/sdks/sandbox/python/src/opensandbox/sync/adapters/egress_adapter.py b/sdks/sandbox/python/src/opensandbox/sync/adapters/egress_adapter.py index bf3e48714..56ffc1971 100644 --- a/sdks/sandbox/python/src/opensandbox/sync/adapters/egress_adapter.py +++ b/sdks/sandbox/python/src/opensandbox/sync/adapters/egress_adapter.py @@ -110,3 +110,16 @@ def patch_rules(self, rules: list[NetworkRule]) -> None: except Exception as e: logger.error("Failed to patch egress policy via endpoint %s", self.endpoint.endpoint, exc_info=e) raise ExceptionConverter.to_sandbox_exception(e) from e + + def delete_rules(self, targets: list[str]) -> None: + try: + from opensandbox.api.egress.api.policy import delete_policy + + response_obj = delete_policy.sync_detailed( + client=self._client, + body=list(targets), + ) + handle_api_error(response_obj, "Delete egress rules") + except Exception as e: + logger.error("Failed to delete egress rules via endpoint %s", self.endpoint.endpoint, exc_info=e) + raise ExceptionConverter.to_sandbox_exception(e) from e diff --git a/sdks/sandbox/python/src/opensandbox/sync/sandbox.py b/sdks/sandbox/python/src/opensandbox/sync/sandbox.py index c236067ff..83cce5919 100644 --- a/sdks/sandbox/python/src/opensandbox/sync/sandbox.py +++ b/sdks/sandbox/python/src/opensandbox/sync/sandbox.py @@ -318,6 +318,17 @@ def patch_egress_rules(self, rules: list[NetworkRule]) -> None: """ self._egress_service.patch_rules(rules) + def delete_egress_rules(self, targets: list[str]) -> None: + """ + Delete egress rules for this sandbox by target. + + Each entry is a FQDN or wildcard domain. Matching rules are removed + from the currently enforced policy. Targets not present in the policy + are silently ignored (idempotent). The current defaultAction is + preserved. + """ + self._egress_service.delete_rules(targets) + def pause(self) -> None: """ Pause the sandbox while preserving its state. diff --git a/sdks/sandbox/python/src/opensandbox/sync/services/egress.py b/sdks/sandbox/python/src/opensandbox/sync/services/egress.py index c9d2ec730..71fb5977a 100644 --- a/sdks/sandbox/python/src/opensandbox/sync/services/egress.py +++ b/sdks/sandbox/python/src/opensandbox/sync/services/egress.py @@ -38,3 +38,13 @@ def patch_rules(self, rules: list[NetworkRule]) -> None: preserved. """ ... + + def delete_rules(self, targets: list[str]) -> None: + """Delete egress rules by target via the sidecar policy API. + + Each entry is a FQDN or wildcard domain. Matching rules are removed + from the currently enforced policy. Targets not present in the policy + are silently ignored (idempotent). The current defaultAction is + preserved. + """ + ... diff --git a/server/DEVELOPMENT.md b/server/DEVELOPMENT.md index cbc9ec891..49e8ab269 100644 --- a/server/DEVELOPMENT.md +++ b/server/DEVELOPMENT.md @@ -61,7 +61,7 @@ This guide provides comprehensive information for developers working on OpenSand [runtime] type = "docker" - execd_image = "opensandbox/execd:v1.0.15" + execd_image = "opensandbox/execd:v1.0.18" [docker] network_mode = "host" diff --git a/server/README.md b/server/README.md index a7c4af441..f764a859a 100644 --- a/server/README.md +++ b/server/README.md @@ -221,11 +221,11 @@ Single source of truth for TOML: **[configuration.md](configuration.md)** (inclu ## Experimental features -Optional **🧪 experimental** behavior; **off by default** in [`example.config.toml`](example.config.toml) (and mirrored copies under `opensandbox_server/examples/`). See release notes before production. +Optional **🧪 experimental** behavior; **off by default** in [`example.config.toml`](opensandbox_server/examples/example.config.toml). See release notes before production. ### Auto-renew on access -Extends sandbox TTL when traffic is observed (lifecycle **proxy** and/or **ingress** + optional **Redis** queue). Design and operations: **[OSEP-0009](../oseps/0009-auto-renew-sandbox-on-ingress-access.md)**. TOML keys (`[renew_intent]`, including nested `redis.*`): see **[configuration.md](configuration.md)** and [`example.config.toml`](example.config.toml). +Extends sandbox TTL when traffic is observed (lifecycle **proxy** and/or **ingress** + optional **Redis** queue). Design and operations: **[OSEP-0009](../oseps/0009-auto-renew-sandbox-on-ingress-access.md)**. TOML keys (`[renew_intent]`, including nested `redis.*`): see **[configuration.md](configuration.md)** and [`example.config.toml`](opensandbox_server/examples/example.config.toml). Per-sandbox: on **create**, set `extensions["access.renew.extend.seconds"]` (string integer **300**–**86400**). Clients using the server proxy: request endpoints with `use_server_proxy=true` (REST) or SDK `ConnectionConfig(..., use_server_proxy=True)` — details in OSEP-0009. diff --git a/server/configuration.md b/server/configuration.md index a178eebbf..e2531bcf3 100644 --- a/server/configuration.md +++ b/server/configuration.md @@ -10,10 +10,10 @@ Example files in this repository: | File | Purpose | |------|---------| -| [`example.config.toml`](example.config.toml) | Docker runtime (English) | -| [`example.config.zh.toml`](example.config.zh.toml) | Docker runtime (中文) | -| [`example.config.k8s.toml`](example.config.k8s.toml) | Kubernetes runtime (English) | -| [`example.config.k8s.zh.toml`](example.config.k8s.zh.toml) | Kubernetes runtime (中文) | +| [`example.config.toml`](opensandbox_server/examples/example.config.toml) | Docker runtime (English) | +| [`example.config.zh.toml`](opensandbox_server/examples/example.config.zh.toml) | Docker runtime (中文) | +| [`example.config.k8s.toml`](opensandbox_server/examples/example.config.k8s.toml) | Kubernetes runtime (English) | +| [`example.config.k8s.zh.toml`](opensandbox_server/examples/example.config.k8s.zh.toml) | Kubernetes runtime (中文) | --- @@ -66,6 +66,11 @@ Example files in this repository: | `eip` | string \| omitted | `null` | Public IP or hostname used as the **host part** when the server returns sandbox endpoint URLs (notably Docker runtime). | | `max_sandbox_timeout_seconds` | integer \| omitted | `null` | Upper bound on sandbox TTL in seconds for **create** requests that specify `timeout`. Must be ≥ **60** if set. Omit to disable the server-side cap. | | `timeout_keep_alive` | integer | `30` | Idle keep-alive timeout (seconds) passed to uvicorn. | +| `limit_concurrency` | integer | `1024` | Maximum concurrent connections before returning 503. Provides backpressure protection under burst load. Set to `0` to disable the cap (TOML cannot express `null`). | +| `backlog` | integer | `2048` | Socket listen backlog passed to uvicorn. | +| `thread_pool_size` | integer | `200` | Maximum size of the anyio default threadpool used by FastAPI to run sync route handlers. The anyio default of 40 throttles bursts of blocking sandbox list/get/delete operations under high concurrency. | +| `loop` | `"auto"` \| `"uvloop"` \| `"asyncio"` | `"auto"` | Event loop implementation. `auto` prefers uvloop and falls back to asyncio. | +| `http` | `"auto"` \| `"httptools"` \| `"h11"` | `"auto"` | HTTP protocol parser. `auto` prefers httptools and falls back to h11. | --- diff --git a/server/docker-compose.example.yaml b/server/docker-compose.example.yaml index 5b7c792d0..4bf565483 100644 --- a/server/docker-compose.example.yaml +++ b/server/docker-compose.example.yaml @@ -10,12 +10,12 @@ configs: [runtime] type = "docker" - # execd_image = "opensandbox/execd:v1.0.15" - execd_image = "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/execd:v1.0.15" + # execd_image = "opensandbox/execd:v1.0.18" + execd_image = "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/execd:v1.0.18" [egress] - image = "opensandbox/egress:v1.0.11" - # image = "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/egress:v1.0.11" + image = "opensandbox/egress:v1.0.12" + # image = "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/egress:v1.0.12" [docker] network_mode = "bridge" diff --git a/server/opensandbox_server/api/lifecycle.py b/server/opensandbox_server/api/lifecycle.py index 576e0fb42..39bbc62b8 100644 --- a/server/opensandbox_server/api/lifecycle.py +++ b/server/opensandbox_server/api/lifecycle.py @@ -19,7 +19,6 @@ All business logic is delegated to the service layer that backs each operation. """ -import asyncio from typing import List, Optional from fastapi import APIRouter, Body, Header, Query, Request, status @@ -111,7 +110,7 @@ async def create_sandbox( 500: {"model": ErrorResponse, "description": "An unexpected server error occurred"}, }, ) -async def list_sandboxes( +def list_sandboxes( state: Optional[List[str]] = Query(None, description="Filter by lifecycle state. Pass multiple times for OR logic."), metadata: Optional[str] = Query(None, description="Arbitrary metadata key-value pairs for filtering (URL encoded)."), page: int = Query(1, ge=1, description="Page number for pagination"), @@ -176,7 +175,7 @@ async def list_sandboxes( 500: {"model": ErrorResponse, "description": "An unexpected server error occurred"}, }, ) -async def get_sandbox( +def get_sandbox( sandbox_id: str, x_request_id: Optional[str] = Header(None, alias="X-Request-ID", description="Unique request identifier for tracing"), ) -> Sandbox: @@ -214,7 +213,7 @@ async def get_sandbox( 500: {"model": ErrorResponse, "description": "An unexpected server error occurred"}, }, ) -async def patch_sandbox_metadata( +def patch_sandbox_metadata( sandbox_id: str, patch: PatchSandboxMetadataRequest = Body(...), x_request_id: Optional[str] = Header(None, alias="X-Request-ID", description="Unique request identifier for tracing"), @@ -239,7 +238,7 @@ async def patch_sandbox_metadata( 500: {"model": ErrorResponse, "description": "An unexpected server error occurred"}, }, ) -async def delete_sandbox( +def delete_sandbox( sandbox_id: str, x_request_id: Optional[str] = Header(None, alias="X-Request-ID", description="Unique request identifier for tracing"), ) -> Response: @@ -279,7 +278,7 @@ async def delete_sandbox( 500: {"model": ErrorResponse, "description": "An unexpected server error occurred"}, }, ) -async def pause_sandbox( +def pause_sandbox( sandbox_id: str, x_request_id: Optional[str] = Header(None, alias="X-Request-ID", description="Unique request identifier for tracing"), ) -> Response: @@ -316,7 +315,7 @@ async def pause_sandbox( 500: {"model": ErrorResponse, "description": "An unexpected server error occurred"}, }, ) -async def resume_sandbox( +def resume_sandbox( sandbox_id: str, x_request_id: Optional[str] = Header(None, alias="X-Request-ID", description="Unique request identifier for tracing"), ) -> Response: @@ -355,7 +354,7 @@ async def resume_sandbox( 500: {"model": ErrorResponse, "description": "An unexpected server error occurred"}, }, ) -async def renew_sandbox_expiration( +def renew_sandbox_expiration( sandbox_id: str, request: RenewSandboxExpirationRequest, x_request_id: Optional[str] = Header(None, alias="X-Request-ID", description="Unique request identifier for tracing"), @@ -402,7 +401,7 @@ async def renew_sandbox_expiration( 500: {"model": ErrorResponse, "description": "An unexpected server error occurred"}, }, ) -async def create_snapshot( +def create_snapshot( sandbox_id: str, response: Response, request: Optional[CreateSnapshotRequest] = None, @@ -412,11 +411,7 @@ async def create_snapshot( Create a persistent point-in-time snapshot from a sandbox. """ create_request = request or CreateSnapshotRequest() - snapshot = await asyncio.to_thread( - snapshot_service.create_snapshot, - sandbox_id, - create_request, - ) + snapshot = snapshot_service.create_snapshot(sandbox_id, create_request) response.headers["Location"] = f"/v1/snapshots/{snapshot.id}" return snapshot @@ -433,7 +428,7 @@ async def create_snapshot( 500: {"model": ErrorResponse, "description": "An unexpected server error occurred"}, }, ) -async def list_snapshots( +def list_snapshots( sandbox_id: Optional[str] = Query(None, alias="sandboxId", description="Filter snapshots by source sandbox identifier"), state: Optional[List[str]] = Query(None, description="Filter by snapshot lifecycle state. Pass multiple times for OR logic."), page: int = Query(1, ge=1, description="Page number for pagination"), @@ -464,7 +459,7 @@ async def list_snapshots( 500: {"model": ErrorResponse, "description": "An unexpected server error occurred"}, }, ) -async def get_snapshot( +def get_snapshot( snapshot_id: str, x_request_id: Optional[str] = Header(None, alias="X-Request-ID", description="Unique request identifier for tracing"), ) -> Snapshot: @@ -488,7 +483,7 @@ async def get_snapshot( 500: {"model": ErrorResponse, "description": "An unexpected server error occurred"}, }, ) -async def delete_snapshot( +def delete_snapshot( snapshot_id: str, x_request_id: Optional[str] = Header(None, alias="X-Request-ID", description="Unique request identifier for tracing"), ) -> Response: @@ -515,7 +510,7 @@ async def delete_snapshot( 500: {"model": ErrorResponse, "description": "An unexpected server error occurred"}, }, ) -async def get_sandbox_endpoint( +def get_sandbox_endpoint( request: Request, sandbox_id: str, port: int, diff --git a/server/opensandbox_server/api/pool.py b/server/opensandbox_server/api/pool.py index 4f33f6bdc..11231bc1e 100644 --- a/server/opensandbox_server/api/pool.py +++ b/server/opensandbox_server/api/pool.py @@ -88,7 +88,7 @@ def _get_pool_service(): 500: {"model": ErrorResponse, "description": "An unexpected server error occurred"}, }, ) -async def create_pool( +def create_pool( request: CreatePoolRequest, x_request_id: Optional[str] = Header(None, alias="X-Request-ID"), ) -> PoolResponse: @@ -121,7 +121,7 @@ async def create_pool( 500: {"model": ErrorResponse, "description": "An unexpected server error occurred"}, }, ) -async def list_pools( +def list_pools( x_request_id: Optional[str] = Header(None, alias="X-Request-ID"), ) -> ListPoolsResponse: """ @@ -151,7 +151,7 @@ async def list_pools( 500: {"model": ErrorResponse, "description": "An unexpected server error occurred"}, }, ) -async def get_pool( +def get_pool( pool_name: str, x_request_id: Optional[str] = Header(None, alias="X-Request-ID"), ) -> PoolResponse: @@ -182,7 +182,7 @@ async def get_pool( 500: {"model": ErrorResponse, "description": "An unexpected server error occurred"}, }, ) -async def update_pool( +def update_pool( pool_name: str, request: UpdatePoolRequest, x_request_id: Optional[str] = Header(None, alias="X-Request-ID"), @@ -217,7 +217,7 @@ async def update_pool( 500: {"model": ErrorResponse, "description": "An unexpected server error occurred"}, }, ) -async def delete_pool( +def delete_pool( pool_name: str, x_request_id: Optional[str] = Header(None, alias="X-Request-ID"), ) -> Response: diff --git a/server/opensandbox_server/api/schema.py b/server/opensandbox_server/api/schema.py index a6987c738..f3be288e0 100644 --- a/server/opensandbox_server/api/schema.py +++ b/server/opensandbox_server/api/schema.py @@ -403,10 +403,10 @@ class CreateSandboxRequest(BaseModel): "null timeout when the workload provider does not support non-expiring sandboxes." ), ) - resource_limits: ResourceLimits = Field( - ..., + resource_limits: Optional[ResourceLimits] = Field( + None, alias="resourceLimits", - description="Runtime resource constraints for the sandbox instance", + description="Runtime resource constraints for the sandbox instance. Optional when poolRef is provided.", ) env: Optional[Dict[str, Optional[str]]] = Field( None, @@ -457,6 +457,19 @@ class CreateSandboxRequest(BaseModel): @model_validator(mode="after") def validate_source_and_entrypoint(self) -> "CreateSandboxRequest": + # When poolRef is set, image/snapshotId/entrypoint/resourceLimits are + # all defined in the Pool CRD and not required from the caller. + has_pool_ref = bool((self.extensions or {}).get("poolRef", "").strip()) + if has_pool_ref: + # Reject conflicting fields that would be ignored in pool mode + if bool((self.snapshot_id or "").strip()): + raise ValueError("snapshotId cannot be used together with poolRef.") + # Normalize blank snapshotId so downstream code won't see + # a truthy whitespace string (e.g. " ") as a real value. + if self.snapshot_id is not None and not self.snapshot_id.strip(): + self.snapshot_id = None + return self + has_image = self.image is not None and bool(self.image.uri.strip()) has_snapshot = bool((self.snapshot_id or "").strip()) @@ -472,6 +485,9 @@ def validate_source_and_entrypoint(self) -> "CreateSandboxRequest": if self.snapshot_id is not None and not has_snapshot: self.snapshot_id = None + if self.resource_limits is None: + raise ValueError("resourceLimits is required when poolRef is not provided.") + return self class Config: diff --git a/server/opensandbox_server/cli.py b/server/opensandbox_server/cli.py index 2b029ced0..e2a7a7b5d 100644 --- a/server/opensandbox_server/cli.py +++ b/server/opensandbox_server/cli.py @@ -37,7 +37,9 @@ RuntimeConfig, ServerConfig, StorageConfig, + load_config, ) +from opensandbox_server.logging_config import configure_logging def _strip_optional(annotation: Any) -> Any: @@ -284,15 +286,25 @@ def main() -> None: if args.config: os.environ[CONFIG_ENV_VAR] = args.config - from opensandbox_server import main as server_main # local import after env is set + # Load config + logging without importing opensandbox_server.main: importing + # main eagerly constructs sandbox_service (restoring containers and starting + # expiration timers), which we defer to the actual worker process so the + # uvicorn reloader supervisor does not run them. + app_config = load_config() + log_config = configure_logging(app_config.log) + server_cfg = app_config.server uvicorn.run( "opensandbox_server.main:app", - host=server_main.app_config.server.host, - port=server_main.app_config.server.port, + host=server_cfg.host, + port=server_cfg.port, reload=args.reload, - log_config=server_main._log_config, - timeout_keep_alive=server_main.app_config.server.timeout_keep_alive, + log_config=log_config, + timeout_keep_alive=server_cfg.timeout_keep_alive, + limit_concurrency=server_cfg.limit_concurrency, + backlog=server_cfg.backlog, + loop=server_cfg.loop, + http=server_cfg.http, ) diff --git a/server/opensandbox_server/config.py b/server/opensandbox_server/config.py index 5a1cedda1..369c716d7 100644 --- a/server/opensandbox_server/config.py +++ b/server/opensandbox_server/config.py @@ -453,6 +453,52 @@ class ServerConfig(BaseModel): "Connections idle longer than this may be closed by the server." ), ) + limit_concurrency: Optional[int] = Field( + default=1024, + ge=0, + description=( + "Maximum concurrent connections before returning 503. " + "Set to 0 to disable (TOML cannot express null). " + "Provides backpressure protection under burst load." + ), + ) + + @field_validator("limit_concurrency", mode="after") + @classmethod + def _zero_disables_limit_concurrency(cls, value: Optional[int]) -> Optional[int]: + # Translate the TOML-friendly sentinel 0 into None so uvicorn applies + # no concurrency cap. TOML has no null literal, so 0 is the only way + # to disable the limit from the config file. + return None if value == 0 else value + backlog: int = Field( + default=2048, + ge=1, + description="Socket listen backlog passed to uvicorn.", + ) + thread_pool_size: int = Field( + default=200, + ge=1, + description=( + "Maximum size of the anyio default threadpool used by FastAPI " + "to run sync route handlers. Default anyio limit is 40, which " + "throttles bursts of blocking sandbox list/get/delete operations " + "under high concurrency." + ), + ) + loop: Literal["auto", "uvloop", "asyncio"] = Field( + default="auto", + description=( + "Event loop implementation. 'auto' uses uvloop when available and " + "falls back to asyncio. 'asyncio' forces the stdlib loop." + ), + ) + http: Literal["auto", "httptools", "h11"] = Field( + default="auto", + description=( + "HTTP protocol parser. 'auto' uses httptools when available and " + "falls back to h11." + ), + ) api_key: Optional[str] = Field( default=None, description="Global API key for authenticating incoming lifecycle API calls.", diff --git a/server/opensandbox_server/examples/example.config.k8s.toml b/server/opensandbox_server/examples/example.config.k8s.toml index 5329c7381..5f397e9f4 100644 --- a/server/opensandbox_server/examples/example.config.k8s.toml +++ b/server/opensandbox_server/examples/example.config.k8s.toml @@ -32,7 +32,7 @@ level = "INFO" [runtime] type = "kubernetes" -execd_image = "opensandbox/execd:v1.0.15" +execd_image = "opensandbox/execd:v1.0.18" [storage] # Allowlist of host path prefixes permitted for bind mounts. @@ -75,7 +75,7 @@ batchsandbox_template_file = "~/batchsandbox-template.yaml" mode = "direct" [egress] -image = "opensandbox/egress:v1.0.11" +image = "opensandbox/egress:v1.0.12" mode = "dns" # Default is true (recommended for dual-stack CNI). Set false only if you need IPv6 in the netns (see server/configuration.md). # disable_ipv6 = false diff --git a/server/opensandbox_server/examples/example.config.k8s.zh.toml b/server/opensandbox_server/examples/example.config.k8s.zh.toml index afad730ab..a1b7cd81c 100644 --- a/server/opensandbox_server/examples/example.config.k8s.zh.toml +++ b/server/opensandbox_server/examples/example.config.k8s.zh.toml @@ -32,7 +32,7 @@ level = "INFO" [runtime] type = "kubernetes" -execd_image = "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/execd:v1.0.15" +execd_image = "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/execd:v1.0.18" [storage] # 允许进行 bind mount 的宿主机路径前缀白名单。 @@ -76,7 +76,7 @@ batchsandbox_template_file = "~/batchsandbox-template.yaml" mode = "direct" [egress] -image = "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/egress:v1.0.11" +image = "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/egress:v1.0.12" mode = "dns" # Default is true (recommended for dual-stack CNI). Set false only if you need IPv6 in the netns (see server/configuration.md). # disable_ipv6 = false diff --git a/server/opensandbox_server/examples/example.config.toml b/server/opensandbox_server/examples/example.config.toml index 251f8d534..9f509392a 100644 --- a/server/opensandbox_server/examples/example.config.toml +++ b/server/opensandbox_server/examples/example.config.toml @@ -32,7 +32,7 @@ level = "INFO" [runtime] type = "docker" -execd_image = "opensandbox/execd:v1.0.15" +execd_image = "opensandbox/execd:v1.0.18" [storage] # Allowlist of host path prefixes permitted for bind mounts. @@ -63,7 +63,7 @@ seccomp_profile = "" mode = "direct" [egress] -image = "opensandbox/egress:v1.0.11" +image = "opensandbox/egress:v1.0.12" mode = "dns" # 🧪 [EXPERIMENTAL] Renew-on-access. Off by default — see server/README.md. diff --git a/server/opensandbox_server/examples/example.config.zh.toml b/server/opensandbox_server/examples/example.config.zh.toml index 3585895ab..689eeda27 100644 --- a/server/opensandbox_server/examples/example.config.zh.toml +++ b/server/opensandbox_server/examples/example.config.zh.toml @@ -32,7 +32,7 @@ level = "INFO" [runtime] type = "docker" -execd_image = "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/execd:v1.0.15" +execd_image = "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/execd:v1.0.18" [storage] allowed_host_paths = [] @@ -61,7 +61,7 @@ seccomp_profile = "" mode = "direct" [egress] -image = "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/egress:v1.0.11" +image = "sandbox-registry.cn-zhangjiakou.cr.aliyuncs.com/opensandbox/egress:v1.0.12" mode = "dns" # 🧪 [EXPERIMENTAL] 按访问续期。默认关闭 — 见 server/README_zh.md。 diff --git a/server/opensandbox_server/main.py b/server/opensandbox_server/main.py index ba0004f67..f33633ed7 100644 --- a/server/opensandbox_server/main.py +++ b/server/opensandbox_server/main.py @@ -61,6 +61,10 @@ async def lifespan(app: FastAPI): logger.error("API key startup confirmation failed: %s", exc) os._exit(1) + from anyio.to_thread import current_default_thread_limiter + + current_default_thread_limiter().total_tokens = app_config.server.thread_pool_size + app.state.http_client = httpx.AsyncClient(timeout=180.0) # Validate secure runtime configuration at startup @@ -204,4 +208,6 @@ async def health_check(): reload=True, log_config=_log_config, timeout_keep_alive=app_config.server.timeout_keep_alive, + loop=app_config.server.loop, + http=app_config.server.http, ) diff --git a/server/opensandbox_server/services/docker/container_ops.py b/server/opensandbox_server/services/docker/container_ops.py index 88fc4e7bd..d1da88a5b 100644 --- a/server/opensandbox_server/services/docker/container_ops.py +++ b/server/opensandbox_server/services/docker/container_ops.py @@ -329,7 +329,7 @@ def _resolve_image_auth( def _resolve_resource_limits( self, request: CreateSandboxRequest ) -> tuple[Optional[int], Optional[int], Optional[int]]: - resource_limits = request.resource_limits.root or {} + resource_limits = (request.resource_limits.root if request.resource_limits else None) or {} mem_limit = parse_memory_limit(resource_limits.get("memory")) nano_cpus = parse_nano_cpus(resource_limits.get("cpu")) gpu_count = parse_gpu_request(resource_limits.get("gpu")) diff --git a/server/opensandbox_server/services/docker/docker_service.py b/server/opensandbox_server/services/docker/docker_service.py index c02264e5c..6a7c8a05f 100644 --- a/server/opensandbox_server/services/docker/docker_service.py +++ b/server/opensandbox_server/services/docker/docker_service.py @@ -607,6 +607,14 @@ async def create_sandbox(self, request: CreateSandboxRequest) -> CreateSandboxRe Raises: HTTPException: If sandbox creation fails """ + if (request.extensions or {}).get("poolRef", "").strip(): + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail={ + "code": "SANDBOX::UNSUPPORTED_POOL_REF", + "message": "poolRef is not supported by the Docker provider. Use Kubernetes BatchSandbox provider instead.", + }, + ) request = resolve_sandbox_image_from_request(request) ensure_entrypoint(request.entrypoint or []) ensure_metadata_labels(request.metadata) @@ -761,7 +769,7 @@ def _provision_sandbox( requested_windows_profile = is_windows_platform(request.platform) if requested_windows_profile: - validate_windows_resource_limits(request.resource_limits.root or {}) + validate_windows_resource_limits((request.resource_limits.root if request.resource_limits else None) or {}) validate_windows_runtime_prerequisites() # Prepare OSSFS mounts first so binds can reference mounted host paths. @@ -855,7 +863,7 @@ def _provision_sandbox( ) environment = inject_windows_resource_limits_env( environment, - request.resource_limits.root or {}, + (request.resource_limits.root if request.resource_limits else None) or {}, ) environment = inject_windows_user_ports(environment, exposed_ports) diff --git a/server/opensandbox_server/services/k8s/batchsandbox_provider.py b/server/opensandbox_server/services/k8s/batchsandbox_provider.py index fba4f6d1e..d9efea549 100644 --- a/server/opensandbox_server/services/k8s/batchsandbox_provider.py +++ b/server/opensandbox_server/services/k8s/batchsandbox_provider.py @@ -40,6 +40,7 @@ apply_egress_to_spec, ) from opensandbox_server.services.k8s.provider_common import ( + DEFAULT_ENTRYPOINT, _build_execd_init_container, _build_main_container, _container_to_dict, @@ -317,7 +318,7 @@ def _apply_platform_node_selector( template_spec=template_spec if isinstance(template_spec, dict) else {}, platform=platform, ) - + def _create_workload_from_pool( self, batchsandbox_name: str, @@ -333,8 +334,10 @@ def _create_workload_from_pool( spec: Dict[str, Any] = { "replicas": 1, "poolRef": pool_ref, - "taskTemplate": self._build_task_template(entrypoint, env), } + needs_task_template = env or entrypoint != DEFAULT_ENTRYPOINT + if needs_task_template: + spec["taskTemplate"] = self._build_task_template(entrypoint, env) if expires_at is not None: spec["expireTime"] = expires_at.isoformat() runtime_manifest = { diff --git a/server/opensandbox_server/services/k8s/client.py b/server/opensandbox_server/services/k8s/client.py index ad9774927..72f39a686 100644 --- a/server/opensandbox_server/services/k8s/client.py +++ b/server/opensandbox_server/services/k8s/client.py @@ -27,6 +27,7 @@ from opensandbox_server.config import KubernetesRuntimeConfig from opensandbox_server.services.k8s.informer import WorkloadInformer +from opensandbox_server.services.k8s.label_selector import matches, parse_selector from opensandbox_server.services.k8s.rate_limiter import TokenBucketRateLimiter logger = logging.getLogger(__name__) @@ -87,6 +88,16 @@ def get_node_v1_api(self) -> NodeV1Api: return self._node_v1_api + def _lookup_informer(self, group: str, version: str, plural: str, namespace: str) -> Optional[WorkloadInformer]: + """Return an existing informer without starting one. Used by write paths + to invalidate cache entries; never auto-create on writes since list paths + own the lazy-start contract.""" + if not self.config.informer_enabled: + return None + key: _InformerKey = (group, version, plural, namespace) + with self._informers_lock: + return self._informers.get(key) + def _get_informer(self, group: str, version: str, plural: str, namespace: str) -> Optional[WorkloadInformer]: """Return the informer for this resource+namespace, starting it lazily.""" if not self.config.informer_enabled: @@ -130,13 +141,17 @@ def create_custom_object( """Create a namespaced custom resource.""" if self._write_limiter: self._write_limiter.acquire() - return self.get_custom_objects_api().create_namespaced_custom_object( + obj = self.get_custom_objects_api().create_namespaced_custom_object( group=group, version=version, namespace=namespace, plural=plural, body=body, ) + informer = self._lookup_informer(group, version, plural, namespace) + if informer: + informer.update_cache(obj) + return obj def get_custom_object( self, @@ -183,7 +198,25 @@ def list_custom_objects( plural: str, label_selector: str = "", ) -> List[Dict[str, Any]]: - """List namespaced custom resources, returning the items list.""" + """List namespaced custom resources, returning the items list. + + Tries the informer cache first when available, synced, and the label + selector falls within the supported in-memory grammar. Falls back to + a direct API call (with rate limiting) otherwise. + """ + informer = self._get_informer(group, version, plural, namespace) + if informer and informer.has_synced: + terms = parse_selector(label_selector) + if terms is not None: + cached = informer.list() + if not terms: + return cached + return [ + obj + for obj in cached + if matches(obj.get("metadata", {}).get("labels") or {}, terms) + ] + if self._read_limiter: self._read_limiter.acquire() try: @@ -220,6 +253,9 @@ def delete_custom_object( name=name, grace_period_seconds=grace_period_seconds, ) + informer = self._lookup_informer(group, version, plural, namespace) + if informer: + informer.delete_from_cache(name) def patch_custom_object( self, @@ -233,7 +269,7 @@ def patch_custom_object( """Patch a namespaced custom resource.""" if self._write_limiter: self._write_limiter.acquire() - return self.get_custom_objects_api().patch_namespaced_custom_object( + obj = self.get_custom_objects_api().patch_namespaced_custom_object( group=group, version=version, namespace=namespace, @@ -241,6 +277,10 @@ def patch_custom_object( name=name, body=body, ) + informer = self._lookup_informer(group, version, plural, namespace) + if informer: + informer.update_cache(obj) + return obj # ------------------------------------------------------------------ # PersistentVolumeClaim operations diff --git a/server/opensandbox_server/services/k8s/informer.py b/server/opensandbox_server/services/k8s/informer.py index 5eec36349..38b440ac6 100644 --- a/server/opensandbox_server/services/k8s/informer.py +++ b/server/opensandbox_server/services/k8s/informer.py @@ -16,7 +16,7 @@ import logging import threading -from typing import Any, Callable, Dict, Optional +from typing import Any, Callable, Dict, List, Optional from kubernetes import watch from kubernetes.client import ApiException @@ -85,6 +85,11 @@ def get(self, name: str) -> Optional[Dict[str, Any]]: with self._lock: return self._cache.get(name) + def list(self) -> List[Dict[str, Any]]: + """Return a snapshot of every cached object.""" + with self._lock: + return list(self._cache.values()) + def update_cache(self, obj: Dict[str, Any]) -> None: """Upsert a single object into the cache. @@ -100,6 +105,11 @@ def update_cache(self, obj: Dict[str, Any]) -> None: self._cache[name] = obj self._advance_resource_version(metadata.get("resourceVersion")) + def delete_from_cache(self, name: str) -> None: + """Evict a single object from the cache by name.""" + with self._lock: + self._cache.pop(name, None) + def _advance_resource_version(self, rv: Optional[str]) -> None: """Advance ``_resource_version`` only when *rv* is strictly newer. diff --git a/server/opensandbox_server/services/k8s/kubernetes_service.py b/server/opensandbox_server/services/k8s/kubernetes_service.py index 28a9fd361..4c674efc9 100644 --- a/server/opensandbox_server/services/k8s/kubernetes_service.py +++ b/server/opensandbox_server/services/k8s/kubernetes_service.py @@ -191,7 +191,8 @@ async def _wait_for_sandbox_ready( while time.time() - start_time < timeout_seconds: try: - workload = self.workload_provider.get_workload( + workload = await asyncio.to_thread( + self.workload_provider.get_workload, sandbox_id=sandbox_id, namespace=self.namespace, ) @@ -264,7 +265,7 @@ def _ensure_image_auth_support(self, request: CreateSandboxRequest) -> None: Raises HTTP 400 if the provider does not support per-request image auth. """ - if request.image.auth is None: + if request.image is None or request.image.auth is None: return if self.workload_provider.supports_image_auth(): return @@ -404,8 +405,11 @@ async def create_sandbox(self, request: CreateSandboxRequest) -> CreateSandboxRe Raises: HTTPException: If creation fails, timeout, or invalid parameters """ - request = resolve_sandbox_image_from_request(request) - ensure_entrypoint(request.entrypoint or []) + has_pool_ref = bool((request.extensions or {}).get("poolRef", "").strip()) + + if not has_pool_ref: + request = resolve_sandbox_image_from_request(request) + ensure_entrypoint(request.entrypoint or []) ensure_metadata_labels(request.metadata) ensure_platform_valid(request.platform) ensure_timeout_within_limit( @@ -440,10 +444,11 @@ async def create_sandbox(self, request: CreateSandboxRequest) -> CreateSandboxRe # Auto-create PVCs that don't exist yet if request.volumes: - self._ensure_pvc_volumes(request.volumes) + await asyncio.to_thread(self._ensure_pvc_volumes, request.volumes) # Create workload - workload_info = self.workload_provider.create_workload( + workload_info = await asyncio.to_thread( + self.workload_provider.create_workload, sandbox_id=sandbox_id, namespace=self.namespace, image_spec=request.image, @@ -499,7 +504,11 @@ async def create_sandbox(self, request: CreateSandboxRequest) -> CreateSandboxRe except HTTPException as e: try: logger.error(f"Creation failed, cleaning up sandbox {sandbox_id}: {e}") - self.workload_provider.delete_workload(sandbox_id, self.namespace) + await asyncio.to_thread( + self.workload_provider.delete_workload, + sandbox_id, + self.namespace, + ) except Exception as cleanup_ex: logger.error(f"Failed to cleanup sandbox {sandbox_id}", exc_info=cleanup_ex) raise @@ -788,21 +797,25 @@ def patch_sandbox_metadata(self, sandbox_id: str, patch: PatchSandboxMetadataReq new_labels = self._apply_metadata_patch(labels, patch) + # JSON merge patch (RFC 7396) on metadata.labels treats keys absent + # from the body as kept. To delete a label we must send the key with + # an explicit null. Build the merge body from the desired final labels + # plus null markers for keys removed by this patch. + label_patch: Dict[str, Optional[str]] = dict(new_labels) + for key, value in patch.items(): + if value is None: + label_patch[key] = None + try: - self.workload_provider.patch_labels( + updated = self.workload_provider.patch_labels( name=name, namespace=self.namespace, - labels=new_labels, + labels=label_patch, ) except Exception as e: logger.error("Error patching labels for sandbox %s: %s", sandbox_id, e) raise _build_k8s_api_error("patch sandbox labels", e) from e - updated = _get_workload_or_404( - self.workload_provider, - self.namespace, - sandbox_id, - ) return _build_sandbox_from_workload(updated, self.workload_provider) def get_endpoint( diff --git a/server/opensandbox_server/services/k8s/label_selector.py b/server/opensandbox_server/services/k8s/label_selector.py new file mode 100644 index 000000000..437bf9fc8 --- /dev/null +++ b/server/opensandbox_server/services/k8s/label_selector.py @@ -0,0 +1,93 @@ +# Copyright 2026 Alibaba Group Holding Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Minimal Kubernetes label selector parser for in-memory matching. + +Supports only the subset that callers in this codebase actually emit: + +- empty string ............ matches every object +- ``key`` ................. key existence +- ``key=value`` ........... equality (``==`` accepted as alias) +- ``a=1,b=2`` ............. comma-joined AND of the above + +When the selector contains anything outside this grammar (set-based ops +like ``in``, ``notin``, ``!key``), :func:`parse_selector` returns ``None`` +so the caller falls back to issuing a real Kubernetes API list request. +""" + +from __future__ import annotations + +from typing import List, Literal, Mapping, Optional, Tuple + +Op = Literal["exists", "eq"] +Term = Tuple[str, Op, Optional[str]] + + +_LABEL_KEY_CHARS = set( + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_./" +) + + +def _is_valid_key(key: str) -> bool: + if not key: + return False + return all(c in _LABEL_KEY_CHARS for c in key) + + +def parse_selector(selector: str) -> Optional[List[Term]]: + """Parse a label selector into a list of AND terms. + + Returns ``None`` when the selector uses syntax beyond what this minimal + parser supports. The empty selector parses to ``[]`` (match-all). + """ + selector = (selector or "").strip() + if not selector: + return [] + + terms: List[Term] = [] + for raw in selector.split(","): + clause = raw.strip() + if not clause: + return None + + if "==" in clause: + key, _, value = clause.partition("==") + elif "=" in clause: + key, _, value = clause.partition("=") + else: + key, value = clause, None + + key = key.strip() + if not _is_valid_key(key): + return None + if value is None: + terms.append((key, "exists", None)) + else: + terms.append((key, "eq", value.strip())) + + return terms + + +def matches(labels: Mapping[str, str], terms: List[Term]) -> bool: + """Return True if ``labels`` satisfy every AND term.""" + for key, op, expected in terms: + if op == "exists": + if key not in labels: + return False + elif op == "eq": + if labels.get(key) != expected: + return False + else: # pragma: no cover - exhaustive on Op + return False + return True diff --git a/server/opensandbox_server/services/k8s/provider_common.py b/server/opensandbox_server/services/k8s/provider_common.py index 38d5bddf8..645056a57 100644 --- a/server/opensandbox_server/services/k8s/provider_common.py +++ b/server/opensandbox_server/services/k8s/provider_common.py @@ -36,6 +36,9 @@ serialize_security_context_to_dict, ) +# Default entrypoint auto-filled by the SDK when user does not provide one. +DEFAULT_ENTRYPOINT = ["tail", "-f", "/dev/null"] + _GPU_RESOURCE_LIMIT_KEY = "gpu" # Canonical extended-resource name advertised by the NVIDIA device plugin. # Hardcoded for parity with the Docker runtime fix (#775), which targets diff --git a/server/opensandbox_server/services/k8s/windows_profile.py b/server/opensandbox_server/services/k8s/windows_profile.py index ccd65cac2..e7b6e1bc9 100644 --- a/server/opensandbox_server/services/k8s/windows_profile.py +++ b/server/opensandbox_server/services/k8s/windows_profile.py @@ -14,9 +14,12 @@ from __future__ import annotations +import math +import re from typing import Any, Dict, List, Optional from opensandbox_server.api.schema import PlatformSpec +from opensandbox_server.services.k8s.provider_common import DEFAULT_ENTRYPOINT from opensandbox_server.services.windows_common import ( inject_windows_resource_limits_env, inject_windows_user_ports, @@ -26,7 +29,11 @@ WINDOWS_OEM_VOLUME_NAME = "opensandbox-win-oem" WINDOWS_KVM_VOLUME_NAME = "opensandbox-win-kvm" WINDOWS_TUN_VOLUME_NAME = "opensandbox-win-tun" +WINDOWS_STORAGE_VOLUME_NAME = "opensandbox-win-storage" WINDOWS_PROFILE_DEFAULT_USER_PORTS = ["44772", "8080", "3389/tcp", "3389/udp", "8006/tcp"] +# Extra memory overhead (in Gi) reserved for QEMU process on top of guest RAM. +WINDOWS_QEMU_MEMORY_OVERHEAD_GI = 2 +_SIZE_PATTERN = re.compile(r"^\s*(\d+)\s*([a-zA-Z]*)\s*$") def is_windows_profile(platform: Optional[PlatformSpec]) -> bool: @@ -90,10 +97,36 @@ def apply_windows_profile_overrides( ) main_container = containers[0] - main_container["command"] = list(entrypoint) + # Entrypoint handling for Windows profile: + # - If user provides a custom entrypoint, use it as container command + # (e.g. for ENI network hack or other custom startup logic). + # - If no entrypoint or the SDK default, remove command to use image + # ENTRYPOINT (dockur/windows starts QEMU via /run/entry.sh). + if entrypoint and entrypoint != DEFAULT_ENTRYPOINT: + main_container["command"] = entrypoint + else: + main_container.pop("command", None) + main_container.pop("args", None) main_container["env"] = windows_env if windows_env else None - main_container.pop("resources", None) + # Set pod resources from resource_limits for proper K8s scheduling. + # Memory includes overhead for the QEMU process itself. + if resource_limits: + limits: Dict[str, str] = {} + if resource_limits.get("cpu"): + limits["cpu"] = resource_limits["cpu"] + if resource_limits.get("memory"): + limits["memory"] = _memory_with_qemu_overhead(resource_limits["memory"]) + if limits: + main_container["resources"] = { + "limits": limits, + "requests": dict(limits), + } + else: + main_container.pop("resources", None) + else: + main_container.pop("resources", None) security_context = main_container.setdefault("securityContext", {}) + security_context["privileged"] = True capabilities = security_context.setdefault("capabilities", {}) drop = capabilities.get("drop") if isinstance(drop, list): @@ -110,6 +143,7 @@ def apply_windows_profile_overrides( {"name": WINDOWS_OEM_VOLUME_NAME, "mountPath": "/oem"}, {"name": WINDOWS_KVM_VOLUME_NAME, "mountPath": "/dev/kvm"}, {"name": WINDOWS_TUN_VOLUME_NAME, "mountPath": "/dev/net/tun"}, + {"name": WINDOWS_STORAGE_VOLUME_NAME, "mountPath": "/storage"}, ], ) @@ -125,9 +159,14 @@ def apply_windows_profile_overrides( "name": WINDOWS_TUN_VOLUME_NAME, "hostPath": {"path": "/dev/net/tun", "type": "CharDevice"}, }, + {"name": WINDOWS_STORAGE_VOLUME_NAME, "emptyDir": {}}, ], ) + # dockur/windows relies on container restart to complete multi-phase + # installation (first boot installs from ISO, second boot runs from disk). + pod_spec["restartPolicy"] = "Always" + def apply_windows_profile_arch_selector( pod_spec: Dict[str, Any], @@ -183,6 +222,29 @@ def _merge_volume_mounts(container: Dict[str, Any], mounts_to_add: List[Dict[str existing_names.add(name) +def _memory_with_qemu_overhead(memory_value: str) -> str: + """Add QEMU process overhead to guest memory for K8s pod resource limits. + + Parses the guest RAM value (e.g. '8G', '16Gi') and adds + WINDOWS_QEMU_MEMORY_OVERHEAD_GI. Returns a Gi-suffixed string suitable + for Kubernetes resource quantities. + """ + match = _SIZE_PATTERN.match(memory_value) + if not match: + return memory_value + amount = int(match.group(1)) + unit = (match.group(2) or "").lower() + if unit in {"g", "gi", "gb"}: + total_gi = amount + WINDOWS_QEMU_MEMORY_OVERHEAD_GI + elif unit in {"m", "mi", "mb"}: + total_gi = math.ceil(amount / 1024) + WINDOWS_QEMU_MEMORY_OVERHEAD_GI + elif unit in {"t", "ti", "tb"}: + total_gi = amount * 1024 + WINDOWS_QEMU_MEMORY_OVERHEAD_GI + else: + return memory_value + return f"{total_gi}Gi" + + def _merge_volumes(pod_spec: Dict[str, Any], volumes_to_add: List[Dict[str, Any]]) -> None: volumes = pod_spec.setdefault("volumes", []) if not isinstance(volumes, list): diff --git a/server/opensandbox_server/services/k8s/workload_mapper.py b/server/opensandbox_server/services/k8s/workload_mapper.py index 6a7297c3d..40e1310cf 100644 --- a/server/opensandbox_server/services/k8s/workload_mapper.py +++ b/server/opensandbox_server/services/k8s/workload_mapper.py @@ -84,10 +84,12 @@ def _build_sandbox_from_workload(workload: Any, workload_provider: Any) -> Sandb def _extract_platform_from_workload(workload: Any) -> Optional[PlatformSpec]: if isinstance(workload, dict): - spec = workload.get("spec", {}) + spec = workload.get("spec") or {} + template = spec.get("template") or {} + pod_template = spec.get("podTemplate") or {} pod_spec = ( - spec.get("template", {}).get("spec") - or spec.get("podTemplate", {}).get("spec") + (template.get("spec") if isinstance(template, dict) else None) + or (pod_template.get("spec") if isinstance(pod_template, dict) else None) or {} ) else: diff --git a/server/opensandbox_server/services/k8s/workload_provider.py b/server/opensandbox_server/services/k8s/workload_provider.py index c85b8f567..40bfdba6d 100644 --- a/server/opensandbox_server/services/k8s/workload_provider.py +++ b/server/opensandbox_server/services/k8s/workload_provider.py @@ -202,10 +202,16 @@ def resume_sandbox(self, sandbox_id: str, namespace: str) -> None: """ raise NotImplementedError("Resume is not supported by this provider") - def patch_labels(self, name: str, namespace: str, labels: Dict[str, str]) -> None: - """Patch workload metadata.labels via JSON merge patch.""" + def patch_labels( + self, name: str, namespace: str, labels: Dict[str, Optional[str]] + ) -> Dict[str, Any]: + """Patch workload metadata.labels via JSON merge patch. + + A None value for a label key deletes that label per RFC 7396. + Returns the API server response (the patched workload). + """ body = {"metadata": {"labels": labels}} - self.k8s_client.patch_custom_object( + return self.k8s_client.patch_custom_object( group=self.group, version=self.version, namespace=namespace, diff --git a/server/pyproject.toml b/server/pyproject.toml index 5bf44ab97..36b6f523f 100644 --- a/server/pyproject.toml +++ b/server/pyproject.toml @@ -51,7 +51,7 @@ dependencies = [ "pydantic-settings", "pyyaml", "tomli; python_version < \"3.11\"", - "uvicorn", + "uvicorn[standard]", "websockets>=14.0", ] diff --git a/server/tests/k8s/test_agent_sandbox_provider.py b/server/tests/k8s/test_agent_sandbox_provider.py index bfc138f2a..5f69b7e33 100644 --- a/server/tests/k8s/test_agent_sandbox_provider.py +++ b/server/tests/k8s/test_agent_sandbox_provider.py @@ -772,7 +772,7 @@ def test_create_workload_with_network_policy_adds_sidecar(self, mock_k8s_client) expires_at=expires_at, execd_image="execd:latest", network_policy=network_policy, - egress_image="opensandbox/egress:v1.0.11", + egress_image="opensandbox/egress:v1.0.12", ) body = mock_k8s_client.create_custom_object.call_args.kwargs["body"] @@ -785,7 +785,7 @@ def test_create_workload_with_network_policy_adds_sidecar(self, mock_k8s_client) # Find sidecar container sidecar = next((c for c in containers if c["name"] == "egress"), None) assert sidecar is not None - assert sidecar["image"] == "opensandbox/egress:v1.0.11" + assert sidecar["image"] == "opensandbox/egress:v1.0.12" # Verify sidecar has environment variable env_vars = {e["name"]: e["value"] for e in sidecar.get("env", [])} @@ -822,7 +822,7 @@ def test_create_workload_with_network_policy_persists_annotation_and_sidecar_tok expires_at=None, execd_image="execd:latest", network_policy=NetworkPolicy(default_action="deny", egress=[]), - egress_image="opensandbox/egress:v1.0.11", + egress_image="opensandbox/egress:v1.0.12", annotations={SANDBOX_EGRESS_AUTH_TOKEN_METADATA_KEY: "egress-token"}, egress_auth_token="egress-token", ) @@ -854,7 +854,7 @@ def test_create_workload_with_egress_mode_dns_nft(self, mock_k8s_client): expires_at=None, execd_image="execd:latest", network_policy=NetworkPolicy(default_action="deny", egress=[]), - egress_image="opensandbox/egress:v1.0.11", + egress_image="opensandbox/egress:v1.0.12", egress_mode=EGRESS_MODE_DNS_NFT, ) @@ -891,7 +891,7 @@ def test_create_workload_with_network_policy_does_not_add_pod_ipv6_sysctls(self, expires_at=expires_at, execd_image="execd:latest", network_policy=network_policy, - egress_image="opensandbox/egress:v1.0.11", + egress_image="opensandbox/egress:v1.0.12", ) body = mock_k8s_client.create_custom_object.call_args.kwargs["body"] @@ -931,7 +931,7 @@ def test_create_workload_with_egress_skips_ipv6_disable_when_not_configured(self expires_at=None, execd_image="execd:latest", network_policy=network_policy, - egress_image="opensandbox/egress:v1.0.11", + egress_image="opensandbox/egress:v1.0.12", ) body = mock_k8s_client.create_custom_object.call_args.kwargs["body"] @@ -964,7 +964,7 @@ def test_create_workload_with_network_policy_drops_net_admin_from_main_container expires_at=expires_at, execd_image="execd:latest", network_policy=network_policy, - egress_image="opensandbox/egress:v1.0.11", + egress_image="opensandbox/egress:v1.0.12", ) body = mock_k8s_client.create_custom_object.call_args.kwargs["body"] @@ -1041,7 +1041,7 @@ def test_egress_sidecar_contains_network_policy_in_env(self, mock_k8s_client): expires_at=expires_at, execd_image="execd:latest", network_policy=network_policy, - egress_image="opensandbox/egress:v1.0.11", + egress_image="opensandbox/egress:v1.0.12", ) body = mock_k8s_client.create_custom_object.call_args.kwargs["body"] diff --git a/server/tests/k8s/test_batchsandbox_provider.py b/server/tests/k8s/test_batchsandbox_provider.py index 452d76ab6..15aa51a34 100644 --- a/server/tests/k8s/test_batchsandbox_provider.py +++ b/server/tests/k8s/test_batchsandbox_provider.py @@ -198,7 +198,9 @@ def test_create_workload_windows_profile_uses_windows_runtime_shape(self, mock_k main_container = pod_spec["containers"][0] assert main_container["command"] == ["cmd", "/c", "echo hello"] - assert "resources" not in main_container + # Resources include QEMU memory overhead (8G + 2Gi overhead = 10Gi) + assert main_container["resources"]["limits"]["cpu"] == "4" + assert main_container["resources"]["limits"]["memory"] == "10Gi" env_dict = {item["name"]: item["value"] for item in main_container.get("env", [])} assert env_dict["VERSION"] == "11" @@ -212,6 +214,33 @@ def test_create_workload_windows_profile_uses_windows_runtime_shape(self, mock_k assert "opensandbox-win-kvm" in volume_names assert "opensandbox-win-tun" in volume_names + def test_create_workload_windows_profile_default_entrypoint_uses_image_entrypoint(self, mock_k8s_client): + """When entrypoint is the SDK default, command is removed so image ENTRYPOINT runs.""" + provider = BatchSandboxProvider(mock_k8s_client) + mock_k8s_client.create_custom_object.return_value = { + "metadata": {"name": "test-id", "uid": "test-uid"} + } + + provider.create_workload( + sandbox_id="test-id", + namespace="test-ns", + image_spec=ImageSpec(uri="dockurr/windows:latest"), + entrypoint=["tail", "-f", "/dev/null"], + env={"VERSION": "11"}, + resource_limits={"cpu": "4", "memory": "8G", "disk": "64G"}, + labels={"opensandbox.io/id": "test-id"}, + expires_at=None, + execd_image="execd:latest", + platform=PlatformSpec(os="windows", arch="amd64"), + ) + + body = mock_k8s_client.create_custom_object.call_args.kwargs["body"] + pod_spec = body["spec"]["template"]["spec"] + main_container = pod_spec["containers"][0] + # No command set - image default ENTRYPOINT will be used + assert "command" not in main_container + assert "args" not in main_container + def test_create_workload_windows_profile_merges_user_ports(self, mock_k8s_client): provider = BatchSandboxProvider(mock_k8s_client) mock_k8s_client.create_custom_object.return_value = { @@ -1514,6 +1543,57 @@ def test_create_workload_poolref_builds_correct_manifest(self, mock_k8s_client): # Verify no template field (pool-based doesn't use template) assert "template" not in body["spec"] + def test_create_workload_poolref_default_entrypoint_no_env_omits_task_template(self, mock_k8s_client): + """When entrypoint is SDK default and env is empty, taskTemplate is omitted.""" + provider = BatchSandboxProvider(mock_k8s_client) + mock_k8s_client.create_custom_object.return_value = { + "metadata": {"name": "test-id", "uid": "test-uid"} + } + + provider.create_workload( + sandbox_id="test-id", + namespace="test-ns", + image_spec=ImageSpec(uri="dockurr/windows:latest"), + entrypoint=["tail", "-f", "/dev/null"], + env={}, + resource_limits={}, + labels={}, + expires_at=None, + execd_image="execd:latest", + extensions={"poolRef": "my-pool"}, + ) + + body = mock_k8s_client.create_custom_object.call_args.kwargs["body"] + assert body["spec"]["poolRef"] == "my-pool" + assert "taskTemplate" not in body["spec"] + + def test_create_workload_poolref_default_entrypoint_with_env_includes_task_template(self, mock_k8s_client): + """When entrypoint is SDK default but env is non-empty, taskTemplate is generated.""" + provider = BatchSandboxProvider(mock_k8s_client) + mock_k8s_client.create_custom_object.return_value = { + "metadata": {"name": "test-id", "uid": "test-uid"} + } + + provider.create_workload( + sandbox_id="test-id", + namespace="test-ns", + image_spec=ImageSpec(uri="dockurr/windows:latest"), + entrypoint=["tail", "-f", "/dev/null"], + env={"VERSION": "11"}, + resource_limits={}, + labels={}, + expires_at=None, + execd_image="execd:latest", + extensions={"poolRef": "my-pool"}, + ) + + body = mock_k8s_client.create_custom_object.call_args.kwargs["body"] + assert body["spec"]["poolRef"] == "my-pool" + assert "taskTemplate" in body["spec"] + task_template = body["spec"]["taskTemplate"] + assert task_template["spec"]["process"]["env"] == [{"name": "VERSION", "value": "11"}] + + class TestBatchSandboxProviderEgress: """BatchSandboxProvider egress sidecar tests""" @@ -1575,7 +1655,7 @@ def test_create_workload_with_network_policy_adds_sidecar(self, mock_k8s_client) expires_at=expires_at, execd_image="execd:latest", network_policy=network_policy, - egress_image="opensandbox/egress:v1.0.11", + egress_image="opensandbox/egress:v1.0.12", ) body = mock_k8s_client.create_custom_object.call_args.kwargs["body"] @@ -1588,7 +1668,7 @@ def test_create_workload_with_network_policy_adds_sidecar(self, mock_k8s_client) # Find sidecar container sidecar = next((c for c in containers if c["name"] == "egress"), None) assert sidecar is not None - assert sidecar["image"] == "opensandbox/egress:v1.0.11" + assert sidecar["image"] == "opensandbox/egress:v1.0.12" # Verify sidecar has environment variable env_vars = {e["name"]: e["value"] for e in sidecar.get("env", [])} @@ -1629,7 +1709,7 @@ def test_create_workload_windows_profile_with_network_policy_keeps_ipv6_disable( execd_image="execd:latest", platform=PlatformSpec(os="windows", arch="amd64"), network_policy=NetworkPolicy(default_action="deny", egress=[]), - egress_image="opensandbox/egress:v1.0.11", + egress_image="opensandbox/egress:v1.0.12", ) body = mock_k8s_client.create_custom_object.call_args.kwargs["body"] @@ -1666,7 +1746,7 @@ def test_create_workload_with_network_policy_persists_annotation_and_sidecar_tok expires_at=None, execd_image="execd:latest", network_policy=NetworkPolicy(default_action="deny", egress=[]), - egress_image="opensandbox/egress:v1.0.11", + egress_image="opensandbox/egress:v1.0.12", annotations={SANDBOX_EGRESS_AUTH_TOKEN_METADATA_KEY: "egress-token"}, egress_auth_token="egress-token", ) @@ -1698,7 +1778,7 @@ def test_create_workload_with_egress_mode_dns_nft(self, mock_k8s_client): expires_at=None, execd_image="execd:latest", network_policy=NetworkPolicy(default_action="deny", egress=[]), - egress_image="opensandbox/egress:v1.0.11", + egress_image="opensandbox/egress:v1.0.12", egress_mode=EGRESS_MODE_DNS_NFT, ) @@ -1736,7 +1816,7 @@ def test_create_workload_with_network_policy_does_not_add_pod_ipv6_sysctls(self, expires_at=expires_at, execd_image="execd:latest", network_policy=network_policy, - egress_image="opensandbox/egress:v1.0.11", + egress_image="opensandbox/egress:v1.0.12", ) body = mock_k8s_client.create_custom_object.call_args.kwargs["body"] @@ -1776,7 +1856,7 @@ def test_create_workload_with_egress_skips_ipv6_disable_when_not_configured(self expires_at=None, execd_image="execd:latest", network_policy=network_policy, - egress_image="opensandbox/egress:v1.0.11", + egress_image="opensandbox/egress:v1.0.12", ) body = mock_k8s_client.create_custom_object.call_args.kwargs["body"] @@ -1809,7 +1889,7 @@ def test_create_workload_with_network_policy_drops_net_admin_from_main_container expires_at=expires_at, execd_image="execd:latest", network_policy=network_policy, - egress_image="opensandbox/egress:v1.0.11", + egress_image="opensandbox/egress:v1.0.12", ) body = mock_k8s_client.create_custom_object.call_args.kwargs["body"] @@ -1886,7 +1966,7 @@ def test_egress_sidecar_contains_network_policy_in_env(self, mock_k8s_client): expires_at=expires_at, execd_image="execd:latest", network_policy=network_policy, - egress_image="opensandbox/egress:v1.0.11", + egress_image="opensandbox/egress:v1.0.12", ) body = mock_k8s_client.create_custom_object.call_args.kwargs["body"] @@ -1971,7 +2051,7 @@ def test_create_workload_with_network_policy_works_with_template(self, mock_k8s_ expires_at=expires_at, execd_image="execd:latest", network_policy=network_policy, - egress_image="opensandbox/egress:v1.0.11", + egress_image="opensandbox/egress:v1.0.12", ) body = mock_k8s_client.create_custom_object.call_args.kwargs["body"] diff --git a/server/tests/k8s/test_egress_helper.py b/server/tests/k8s/test_egress_helper.py index ead2ccead..29aba8a12 100644 --- a/server/tests/k8s/test_egress_helper.py +++ b/server/tests/k8s/test_egress_helper.py @@ -47,7 +47,7 @@ class TestEgressSidecarViaApply: def test_builds_container_with_basic_config(self): """Test that container is built with correct basic configuration.""" - egress_image = "opensandbox/egress:v1.0.11" + egress_image = "opensandbox/egress:v1.0.12" network_policy = NetworkPolicy( default_action="deny", egress=[ @@ -64,7 +64,7 @@ def test_builds_container_with_basic_config(self): def test_contains_egress_rules_environment_variable(self): """Test that container includes OPENSANDBOX_EGRESS_RULES environment variable.""" - egress_image = "opensandbox/egress:v1.0.11" + egress_image = "opensandbox/egress:v1.0.12" network_policy = NetworkPolicy( default_action="deny", egress=[NetworkRule(action="allow", target="example.com")], @@ -80,7 +80,7 @@ def test_contains_egress_rules_environment_variable(self): assert env_vars[1]["value"] == EGRESS_MODE_DNS def test_contains_egress_token_when_provided(self): - egress_image = "opensandbox/egress:v1.0.11" + egress_image = "opensandbox/egress:v1.0.12" network_policy = NetworkPolicy( default_action="deny", egress=[NetworkRule(action="allow", target="example.com")], @@ -97,7 +97,7 @@ def test_contains_egress_token_when_provided(self): assert env_vars[EGRESS_MODE_ENV] == EGRESS_MODE_DNS def test_egress_mode_dns_nft(self): - egress_image = "opensandbox/egress:v1.0.11" + egress_image = "opensandbox/egress:v1.0.12" network_policy = NetworkPolicy( default_action="deny", egress=[NetworkRule(action="allow", target="example.com")], @@ -114,7 +114,7 @@ def test_egress_mode_dns_nft(self): def test_serializes_network_policy_correctly(self): """Test that network policy is correctly serialized to JSON.""" - egress_image = "opensandbox/egress:v1.0.11" + egress_image = "opensandbox/egress:v1.0.12" network_policy = NetworkPolicy( default_action="deny", egress=[ @@ -139,7 +139,7 @@ def test_serializes_network_policy_correctly(self): def test_handles_empty_egress_rules(self): """Test that empty egress rules are handled correctly.""" - egress_image = "opensandbox/egress:v1.0.11" + egress_image = "opensandbox/egress:v1.0.12" network_policy = NetworkPolicy( default_action="allow", egress=[], @@ -155,7 +155,7 @@ def test_handles_empty_egress_rules(self): def test_handles_missing_default_action(self): """Test that missing default_action is handled (exclude_none=True).""" - egress_image = "opensandbox/egress:v1.0.11" + egress_image = "opensandbox/egress:v1.0.12" network_policy = NetworkPolicy( egress=[NetworkRule(action="allow", target="example.com")], ) @@ -170,7 +170,7 @@ def test_handles_missing_default_action(self): def test_security_context_adds_net_admin_not_privileged(self): """Egress sidecar uses NET_ADMIN only (IPv6 is disabled in execd init when egress is on).""" - egress_image = "opensandbox/egress:v1.0.11" + egress_image = "opensandbox/egress:v1.0.12" network_policy = NetworkPolicy( default_action="deny", egress=[], @@ -184,14 +184,14 @@ def test_security_context_adds_net_admin_not_privileged(self): def test_no_command_uses_image_entrypoint(self): container = _egress_container( - "opensandbox/egress:v1.0.11", + "opensandbox/egress:v1.0.12", NetworkPolicy(default_action="deny", egress=[]), ) assert "command" not in container def test_container_spec_is_valid_kubernetes_format(self): """Test that returned container spec is in valid Kubernetes format.""" - egress_image = "opensandbox/egress:v1.0.11" + egress_image = "opensandbox/egress:v1.0.12" network_policy = NetworkPolicy( default_action="deny", egress=[NetworkRule(action="allow", target="example.com")], @@ -212,7 +212,7 @@ def test_container_spec_is_valid_kubernetes_format(self): def test_handles_wildcard_domains(self): """Test that wildcard domains in egress rules are handled correctly.""" - egress_image = "opensandbox/egress:v1.0.11" + egress_image = "opensandbox/egress:v1.0.12" network_policy = NetworkPolicy( default_action="deny", egress=[ @@ -254,7 +254,7 @@ def test_adds_egress_sidecar_container(self): default_action="deny", egress=[NetworkRule(action="allow", target="example.com")], ) - egress_image = "opensandbox/egress:v1.0.11" + egress_image = "opensandbox/egress:v1.0.12" apply_egress_to_spec( containers, @@ -273,7 +273,7 @@ def test_does_not_touch_unrelated_pod_state(self): default_action="deny", egress=[NetworkRule(action="allow", target="example.com")], ) - egress_image = "opensandbox/egress:v1.0.11" + egress_image = "opensandbox/egress:v1.0.12" apply_egress_to_spec( containers, @@ -298,7 +298,7 @@ def test_preserves_existing_pod_sysctls_when_not_passed_in(self): default_action="deny", egress=[NetworkRule(action="allow", target="example.com")], ) - egress_image = "opensandbox/egress:v1.0.11" + egress_image = "opensandbox/egress:v1.0.12" apply_egress_to_spec( containers, @@ -320,7 +320,7 @@ def test_no_op_when_no_network_policy(self): apply_egress_to_spec( containers, None, - "opensandbox/egress:v1.0.11", + "opensandbox/egress:v1.0.12", ) assert len(containers) == 0 diff --git a/server/tests/k8s/test_k8s_client.py b/server/tests/k8s/test_k8s_client.py index 3c8d36dbc..dadf3e773 100644 --- a/server/tests/k8s/test_k8s_client.py +++ b/server/tests/k8s/test_k8s_client.py @@ -144,6 +144,50 @@ def test_create_custom_object_delegates_to_api(self, k8s_runtime_config): group="g", version="v1", namespace="ns", plural="foos", body=body ) + def test_create_custom_object_updates_informer_cache(self, k8s_runtime_config): + """create_custom_object upserts the new object into an existing informer cache.""" + c = self._make_client(k8s_runtime_config) + created = {"metadata": {"name": "foo-1", "resourceVersion": "11"}} + c._custom_objects_api.create_namespaced_custom_object.return_value = created + fake_informer = MagicMock() + c._informers[("g", "v1", "foos", "ns")] = fake_informer + c.config = MagicMock(informer_enabled=True, read_qps=0.0, write_qps=0.0) + result = c.create_custom_object("g", "v1", "ns", "foos", {"metadata": {"name": "foo-1"}}) + assert result == created + fake_informer.update_cache.assert_called_once_with(created) + + def test_patch_custom_object_updates_informer_cache(self, k8s_runtime_config): + """patch_custom_object upserts the patched object into an existing informer cache.""" + c = self._make_client(k8s_runtime_config) + patched = {"metadata": {"name": "foo-1", "resourceVersion": "12"}} + c._custom_objects_api.patch_namespaced_custom_object.return_value = patched + fake_informer = MagicMock() + c._informers[("g", "v1", "foos", "ns")] = fake_informer + c.config = MagicMock(informer_enabled=True, read_qps=0.0, write_qps=0.0) + result = c.patch_custom_object("g", "v1", "ns", "foos", "foo-1", {"spec": {"x": 1}}) + assert result == patched + fake_informer.update_cache.assert_called_once_with(patched) + + def test_delete_custom_object_evicts_informer_cache(self, k8s_runtime_config): + """delete_custom_object removes the object from an existing informer cache.""" + c = self._make_client(k8s_runtime_config) + fake_informer = MagicMock() + c._informers[("g", "v1", "foos", "ns")] = fake_informer + c.config = MagicMock(informer_enabled=True, read_qps=0.0, write_qps=0.0) + c.delete_custom_object("g", "v1", "ns", "foos", "foo-1") + fake_informer.delete_from_cache.assert_called_once_with("foo-1") + + def test_write_paths_skip_cache_when_no_informer(self, k8s_runtime_config): + """Write paths must not crash when no informer has been started yet.""" + c = self._make_client(k8s_runtime_config) + c._custom_objects_api.create_namespaced_custom_object.return_value = {"metadata": {"name": "x"}} + c._custom_objects_api.patch_namespaced_custom_object.return_value = {"metadata": {"name": "x"}} + c.config = MagicMock(informer_enabled=True, read_qps=0.0, write_qps=0.0) + # No informers registered → _lookup_informer returns None + c.create_custom_object("g", "v1", "ns", "foos", {"metadata": {"name": "x"}}) + c.patch_custom_object("g", "v1", "ns", "foos", "x", {}) + c.delete_custom_object("g", "v1", "ns", "foos", "x") + def test_get_custom_object_returns_none_on_404(self, k8s_runtime_config): """get_custom_object returns None when the API raises a 404.""" c = self._make_client(k8s_runtime_config) @@ -233,6 +277,100 @@ def test_list_custom_objects_reraises_non_404(self, k8s_runtime_config): with pytest.raises(ApiException): c.list_custom_objects("g", "v1", "ns", "foos") + def _attach_synced_informer(self, c, items): + fake_informer = MagicMock() + fake_informer.has_synced = True + fake_informer.list.return_value = list(items) + c._informers[("g", "v1", "foos", "ns")] = fake_informer + c.config = MagicMock( + informer_enabled=True, + informer_resync_seconds=300, + informer_watch_timeout_seconds=60, + read_qps=0.0, + write_qps=0.0, + ) + return fake_informer + + def test_list_custom_objects_returns_cached_when_synced(self, k8s_runtime_config): + """When the informer is synced, list_custom_objects serves from cache.""" + c = self._make_client(k8s_runtime_config) + items = [ + {"metadata": {"name": "a", "labels": {"opensandbox.io/id": "a"}}}, + {"metadata": {"name": "b", "labels": {"opensandbox.io/id": "b"}}}, + ] + self._attach_synced_informer(c, items) + result = c.list_custom_objects("g", "v1", "ns", "foos") + assert result == items + c._custom_objects_api.list_namespaced_custom_object.assert_not_called() + + def test_list_custom_objects_filters_cached_by_label_existence( + self, k8s_runtime_config + ): + """Bare-key selector filters cached items in memory without an API call.""" + c = self._make_client(k8s_runtime_config) + items = [ + {"metadata": {"name": "with-id", "labels": {"opensandbox.io/id": "x"}}}, + {"metadata": {"name": "no-id", "labels": {"other": "y"}}}, + ] + self._attach_synced_informer(c, items) + result = c.list_custom_objects( + "g", "v1", "ns", "foos", label_selector="opensandbox.io/id" + ) + assert [obj["metadata"]["name"] for obj in result] == ["with-id"] + c._custom_objects_api.list_namespaced_custom_object.assert_not_called() + + def test_list_custom_objects_filters_cached_by_equality(self, k8s_runtime_config): + """key=value selector filters cached items in memory without an API call.""" + c = self._make_client(k8s_runtime_config) + items = [ + {"metadata": {"name": "alpha", "labels": {"team": "infra"}}}, + {"metadata": {"name": "beta", "labels": {"team": "data"}}}, + ] + self._attach_synced_informer(c, items) + result = c.list_custom_objects( + "g", "v1", "ns", "foos", label_selector="team=infra" + ) + assert [obj["metadata"]["name"] for obj in result] == ["alpha"] + c._custom_objects_api.list_namespaced_custom_object.assert_not_called() + + def test_list_custom_objects_falls_back_when_informer_unsynced( + self, k8s_runtime_config + ): + """Cache miss when has_synced=False routes to direct API.""" + c = self._make_client(k8s_runtime_config) + fake_informer = MagicMock() + fake_informer.has_synced = False + c._informers[("g", "v1", "foos", "ns")] = fake_informer + c.config = MagicMock( + informer_enabled=True, + informer_resync_seconds=300, + informer_watch_timeout_seconds=60, + read_qps=0.0, + write_qps=0.0, + ) + c._custom_objects_api.list_namespaced_custom_object.return_value = { + "items": [{"metadata": {"name": "z"}}] + } + result = c.list_custom_objects("g", "v1", "ns", "foos") + assert [obj["metadata"]["name"] for obj in result] == ["z"] + fake_informer.list.assert_not_called() + c._custom_objects_api.list_namespaced_custom_object.assert_called_once() + + def test_list_custom_objects_falls_back_on_unsupported_selector( + self, k8s_runtime_config + ): + """Set-based selectors (in/notin) bypass the cache parser and hit the API.""" + c = self._make_client(k8s_runtime_config) + self._attach_synced_informer(c, [{"metadata": {"name": "x"}}]) + c._custom_objects_api.list_namespaced_custom_object.return_value = { + "items": [{"metadata": {"name": "from-api"}}] + } + result = c.list_custom_objects( + "g", "v1", "ns", "foos", label_selector="env in (prod, staging)" + ) + assert [obj["metadata"]["name"] for obj in result] == ["from-api"] + c._custom_objects_api.list_namespaced_custom_object.assert_called_once() + def test_delete_custom_object_delegates_to_api(self, k8s_runtime_config): """delete_custom_object forwards arguments to the raw API.""" c = self._make_client(k8s_runtime_config) diff --git a/server/tests/k8s/test_k8s_windows_profile.py b/server/tests/k8s/test_k8s_windows_profile.py new file mode 100644 index 000000000..c16ee108f --- /dev/null +++ b/server/tests/k8s/test_k8s_windows_profile.py @@ -0,0 +1,249 @@ +# Copyright 2026 Alibaba Group Holding Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Unit tests for K8s windows_profile module.""" + +import pytest + +from opensandbox_server.services.k8s.windows_profile import ( + _memory_with_qemu_overhead, + apply_windows_profile_overrides, + build_windows_profile_env, +) + + +class TestMemoryWithQemuOverhead: + """Tests for _memory_with_qemu_overhead helper.""" + + @pytest.mark.parametrize( + ("input_value", "expected"), + [ + ("8G", "10Gi"), + ("16G", "18Gi"), + ("4Gi", "6Gi"), + ("8Gb", "10Gi"), + ], + ) + def test_gigabyte_units(self, input_value, expected): + assert _memory_with_qemu_overhead(input_value) == expected + + @pytest.mark.parametrize( + ("input_value", "expected"), + [ + ("8192M", "10Gi"), # 8192/1024 = 8, + 2 = 10 + ("8192Mi", "10Gi"), + ("4096Mb", "6Gi"), # 4096/1024 = 4, + 2 = 6 + ("1000Mi", "3Gi"), # ceil(1000/1024) = 1, + 2 = 3 + ], + ) + def test_megabyte_units(self, input_value, expected): + assert _memory_with_qemu_overhead(input_value) == expected + + @pytest.mark.parametrize( + ("input_value", "expected"), + [ + ("1T", "1026Gi"), # 1*1024 + 2 = 1026 + ("1Ti", "1026Gi"), + ], + ) + def test_terabyte_units(self, input_value, expected): + assert _memory_with_qemu_overhead(input_value) == expected + + def test_unrecognized_unit_returns_original(self): + assert _memory_with_qemu_overhead("8K") == "8K" + assert _memory_with_qemu_overhead("8Ki") == "8Ki" + + def test_unparseable_value_returns_original(self): + assert _memory_with_qemu_overhead("invalid") == "invalid" + assert _memory_with_qemu_overhead("") == "" + + def test_whitespace_tolerance(self): + assert _memory_with_qemu_overhead(" 8 G ") == "10Gi" + + +class TestBuildWindowsProfileEnv: + """Tests for build_windows_profile_env.""" + + def test_does_not_inject_kvm_n_by_default(self): + result = build_windows_profile_env( + env={"VERSION": "11"}, + resource_limits={"cpu": "4", "memory": "8G", "disk": "64G"}, + ) + env_dict = {item["name"]: item["value"] for item in result} + assert "KVM" not in env_dict + + def test_preserves_user_kvm_override(self): + result = build_windows_profile_env( + env={"VERSION": "11", "KVM": "N"}, + resource_limits={"cpu": "4", "memory": "8G", "disk": "64G"}, + ) + env_dict = {item["name"]: item["value"] for item in result} + assert env_dict["KVM"] == "N" + + def test_includes_user_env_and_resource_derived_env(self): + result = build_windows_profile_env( + env={"VERSION": "11", "LANGUAGE": "Chinese"}, + resource_limits={"cpu": "4", "memory": "8G", "disk": "64G"}, + ) + env_dict = {item["name"]: item["value"] for item in result} + assert env_dict["VERSION"] == "11" + assert env_dict["LANGUAGE"] == "Chinese" + assert env_dict["CPU_CORES"] == "4" + assert env_dict["RAM_SIZE"] == "8G" + assert env_dict["DISK_SIZE"] == "64G" + + +class TestApplyWindowsProfileOverrides: + """Tests for apply_windows_profile_overrides entrypoint and resource handling.""" + + def _make_pod_spec(self): + return { + "initContainers": [ + { + "name": "execd-installer", + "image": "execd:test", + "command": ["/bin/sh", "-c"], + "args": ["cp ./execd /opt/opensandbox/bin/execd"], + "volumeMounts": [ + {"name": "opensandbox-bin", "mountPath": "/opt/opensandbox/bin"} + ], + } + ], + "containers": [ + { + "name": "sandbox", + "image": "dockurr/windows:latest", + "command": ["/opt/opensandbox/bin/bootstrap.sh", "tail", "-f", "/dev/null"], + "env": [{"name": "EXECD", "value": "/opt/opensandbox/bin/execd"}], + "volumeMounts": [ + {"name": "opensandbox-bin", "mountPath": "/opt/opensandbox/bin"} + ], + } + ], + "volumes": [{"name": "opensandbox-bin", "emptyDir": {}}], + } + + def test_custom_entrypoint_sets_command(self): + pod_spec = self._make_pod_spec() + apply_windows_profile_overrides( + pod_spec=pod_spec, + entrypoint=["/bin/sh", "-c", "patch && exec /run/entry.sh"], + env={"VERSION": "11"}, + resource_limits={"cpu": "4", "memory": "8G", "disk": "64G"}, + ) + main = pod_spec["containers"][0] + assert main["command"] == ["/bin/sh", "-c", "patch && exec /run/entry.sh"] + assert "args" not in main + + def test_default_entrypoint_removes_command(self): + pod_spec = self._make_pod_spec() + apply_windows_profile_overrides( + pod_spec=pod_spec, + entrypoint=["tail", "-f", "/dev/null"], + env={"VERSION": "11"}, + resource_limits={"cpu": "4", "memory": "8G", "disk": "64G"}, + ) + main = pod_spec["containers"][0] + assert "command" not in main + assert "args" not in main + + def test_empty_entrypoint_removes_command(self): + pod_spec = self._make_pod_spec() + apply_windows_profile_overrides( + pod_spec=pod_spec, + entrypoint=[], + env={"VERSION": "11"}, + resource_limits={"cpu": "4", "memory": "8G", "disk": "64G"}, + ) + main = pod_spec["containers"][0] + assert "command" not in main + + def test_resource_limits_sets_resources_with_overhead(self): + pod_spec = self._make_pod_spec() + apply_windows_profile_overrides( + pod_spec=pod_spec, + entrypoint=["tail", "-f", "/dev/null"], + env={}, + resource_limits={"cpu": "4", "memory": "8G", "disk": "64G"}, + ) + main = pod_spec["containers"][0] + assert main["resources"]["limits"]["cpu"] == "4" + assert main["resources"]["limits"]["memory"] == "10Gi" + assert main["resources"]["requests"]["cpu"] == "4" + assert main["resources"]["requests"]["memory"] == "10Gi" + + def test_empty_resource_limits_removes_resources(self): + pod_spec = self._make_pod_spec() + pod_spec["containers"][0]["resources"] = {"limits": {"cpu": "1"}} + apply_windows_profile_overrides( + pod_spec=pod_spec, + entrypoint=["tail", "-f", "/dev/null"], + env={}, + resource_limits={}, + ) + main = pod_spec["containers"][0] + assert "resources" not in main + + def test_resource_limits_with_only_disk_removes_resources(self): + """disk is not a K8s resource, so if only disk is present, no limits are set.""" + pod_spec = self._make_pod_spec() + apply_windows_profile_overrides( + pod_spec=pod_spec, + entrypoint=["tail", "-f", "/dev/null"], + env={}, + resource_limits={"disk": "64G"}, + ) + main = pod_spec["containers"][0] + assert "resources" not in main + + def test_sets_privileged_true(self): + pod_spec = self._make_pod_spec() + apply_windows_profile_overrides( + pod_spec=pod_spec, + entrypoint=["tail", "-f", "/dev/null"], + env={}, + resource_limits={"cpu": "4", "memory": "8G"}, + ) + main = pod_spec["containers"][0] + assert main["securityContext"]["privileged"] is True + + def test_sets_restart_policy_always(self): + pod_spec = self._make_pod_spec() + pod_spec["restartPolicy"] = "Never" + apply_windows_profile_overrides( + pod_spec=pod_spec, + entrypoint=["tail", "-f", "/dev/null"], + env={}, + resource_limits={"cpu": "4", "memory": "8G"}, + ) + assert pod_spec["restartPolicy"] == "Always" + + def test_adds_storage_volume_and_mount(self): + pod_spec = self._make_pod_spec() + apply_windows_profile_overrides( + pod_spec=pod_spec, + entrypoint=["tail", "-f", "/dev/null"], + env={}, + resource_limits={"cpu": "4", "memory": "8G"}, + ) + volume_names = [v["name"] for v in pod_spec["volumes"]] + assert "opensandbox-win-storage" in volume_names + storage_vol = next(v for v in pod_spec["volumes"] if v["name"] == "opensandbox-win-storage") + assert storage_vol == {"name": "opensandbox-win-storage", "emptyDir": {}} + + main = pod_spec["containers"][0] + mount_names = [m["name"] for m in main["volumeMounts"]] + assert "opensandbox-win-storage" in mount_names + storage_mount = next(m for m in main["volumeMounts"] if m["name"] == "opensandbox-win-storage") + assert storage_mount["mountPath"] == "/storage" diff --git a/server/tests/k8s/test_kubernetes_service.py b/server/tests/k8s/test_kubernetes_service.py index 553b93311..3f5f583fe 100644 --- a/server/tests/k8s/test_kubernetes_service.py +++ b/server/tests/k8s/test_kubernetes_service.py @@ -224,7 +224,7 @@ async def test_create_sandbox_with_network_policy_passes_egress_token_and_annota self, k8s_service, create_sandbox_request ): create_sandbox_request.network_policy = NetworkPolicy(default_action="deny", egress=[]) - k8s_service.app_config.egress = EgressConfig(image="opensandbox/egress:v1.0.11") + k8s_service.app_config.egress = EgressConfig(image="opensandbox/egress:v1.0.12") k8s_service.workload_provider.create_workload.return_value = { "name": "test-id", "uid": "uid-1" } @@ -298,7 +298,7 @@ async def test_create_sandbox_with_network_policy_passes_egress_mode_dns_nft_fro ): create_sandbox_request.network_policy = NetworkPolicy(default_action="deny", egress=[]) k8s_service.app_config.egress = EgressConfig( - image="opensandbox/egress:v1.0.11", + image="opensandbox/egress:v1.0.12", mode=EGRESS_MODE_DNS_NFT, ) k8s_service.workload_provider.create_workload.return_value = { @@ -525,6 +525,67 @@ async def test_create_sandbox_rejects_timeout_above_configured_maximum( assert "configured maximum of 3600s" in exc_info.value.detail["message"] k8s_service.workload_provider.create_workload.assert_not_called() + @pytest.mark.asyncio + async def test_create_sandbox_pool_mode_skips_image_and_entrypoint_validation( + self, k8s_service, mock_workload + ): + """Pool mode: poolRef only, no image/entrypoint/resourceLimits — should succeed.""" + from opensandbox_server.api.schema import CreateSandboxRequest + + pool_request = CreateSandboxRequest( + extensions={"poolRef": "my-pool"}, + ) + + k8s_service.workload_provider.create_workload.return_value = { + "name": "test-sandbox-pool", + "uid": "pool-123", + } + k8s_service.workload_provider.get_workload.return_value = mock_workload + k8s_service.workload_provider.get_status.return_value = { + "state": "Running", + "reason": "", + "message": "Pod is running", + "last_transition_at": datetime.now(timezone.utc), + } + k8s_service.workload_provider.get_endpoint_info.return_value = "10.244.0.5:8080" + k8s_service.workload_provider.get_expiration.return_value = datetime.now(timezone.utc) + timedelta(hours=1) + + response = await k8s_service.create_sandbox(pool_request) + + assert response.id is not None + assert response.status.state == "Running" + k8s_service.workload_provider.create_workload.assert_called_once() + + @pytest.mark.asyncio + async def test_create_sandbox_pool_mode_image_auth_guard_no_error( + self, k8s_service, mock_workload + ): + """Pool mode with image=None should not raise AttributeError in _ensure_image_auth_support.""" + from opensandbox_server.api.schema import CreateSandboxRequest + + pool_request = CreateSandboxRequest( + extensions={"poolRef": "my-pool"}, + ) + assert pool_request.image is None + + k8s_service.workload_provider.create_workload.return_value = { + "name": "test-sandbox-pool2", + "uid": "pool-456", + } + k8s_service.workload_provider.get_workload.return_value = mock_workload + k8s_service.workload_provider.get_status.return_value = { + "state": "Running", + "reason": "", + "message": "Pod is running", + "last_transition_at": datetime.now(timezone.utc), + } + k8s_service.workload_provider.get_endpoint_info.return_value = "10.244.0.6:8080" + k8s_service.workload_provider.get_expiration.return_value = datetime.now(timezone.utc) + timedelta(hours=1) + + # Should not raise AttributeError on None.auth + response = await k8s_service.create_sandbox(pool_request) + assert response.id is not None + class TestWaitForSandboxReady: """_wait_for_sandbox_ready method tests""" @@ -1236,3 +1297,62 @@ def test_signed_endpoint_different_expires_produces_different_endpoints(self, k8 ep2 = k8s_service.get_endpoint("sbx-001", 8080, expires=2000000500) assert ep1.endpoint != ep2.endpoint + + +class TestPatchSandboxMetadata: + """Verify patch_sandbox_metadata builds the JSON merge-patch body correctly + and uses the API server's PATCH response (not a cache-prone re-fetch).""" + + @staticmethod + def _workload(labels: dict) -> dict: + return { + "metadata": { + "name": "sandbox-sbx-001", + "labels": dict(labels), + "creationTimestamp": datetime(2026, 1, 1, tzinfo=timezone.utc), + }, + "spec": {}, + "status": {"conditions": []}, + } + + @staticmethod + def _stub_provider_status(k8s_service) -> None: + k8s_service.workload_provider.get_status.return_value = { + "state": "Running", + "reason": None, + "message": None, + "last_transition_at": None, + } + k8s_service.workload_provider.get_expiration.return_value = None + + def test_patch_body_sends_null_for_deleted_keys(self, k8s_service): + initial = {"opensandbox.io/id": "sbx-001", "team": "infra", "env": "dev"} + patched = {"opensandbox.io/id": "sbx-001", "env": "stage"} + + k8s_service.workload_provider.get_workload.return_value = self._workload(initial) + k8s_service.workload_provider.patch_labels.return_value = self._workload(patched) + self._stub_provider_status(k8s_service) + + k8s_service.patch_sandbox_metadata("sbx-001", {"env": "stage", "team": None}) + + k8s_service.workload_provider.patch_labels.assert_called_once() + body_labels = k8s_service.workload_provider.patch_labels.call_args.kwargs["labels"] + assert body_labels["env"] == "stage" + assert body_labels["team"] is None + assert body_labels["opensandbox.io/id"] == "sbx-001" + + def test_returns_sandbox_from_patch_response(self, k8s_service): + """The PATCH response is authoritative; re-reading via get_workload + could hit a stale informer cache.""" + initial = {"opensandbox.io/id": "sbx-001", "env": "dev"} + patched = {"opensandbox.io/id": "sbx-001", "env": "stage"} + + k8s_service.workload_provider.get_workload.return_value = self._workload(initial) + k8s_service.workload_provider.patch_labels.return_value = self._workload(patched) + self._stub_provider_status(k8s_service) + + sandbox = k8s_service.patch_sandbox_metadata("sbx-001", {"env": "stage"}) + + assert sandbox.metadata == {"env": "stage"} + # Pre-patch read only; no second get_workload after patch_labels. + assert k8s_service.workload_provider.get_workload.call_count == 1 diff --git a/server/tests/k8s/test_label_selector.py b/server/tests/k8s/test_label_selector.py new file mode 100644 index 000000000..a7e51acfc --- /dev/null +++ b/server/tests/k8s/test_label_selector.py @@ -0,0 +1,80 @@ +# Copyright 2026 Alibaba Group Holding Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from opensandbox_server.services.k8s.label_selector import ( + matches, + parse_selector, +) + + +class TestParseSelector: + def test_empty_selector_returns_match_all_terms(self): + assert parse_selector("") == [] + assert parse_selector(" ") == [] + + def test_bare_key_parses_as_existence_term(self): + assert parse_selector("opensandbox.io/id") == [ + ("opensandbox.io/id", "exists", None) + ] + + def test_equality_parses_as_eq_term(self): + assert parse_selector("team=infra") == [("team", "eq", "infra")] + + def test_double_equals_parses_as_eq_term(self): + assert parse_selector("team==infra") == [("team", "eq", "infra")] + + def test_comma_joined_clauses_parse_as_and(self): + assert parse_selector("team=infra,project") == [ + ("team", "eq", "infra"), + ("project", "exists", None), + ] + + def test_whitespace_around_clauses_is_tolerated(self): + assert parse_selector(" team = infra , project ") == [ + ("team", "eq", "infra"), + ("project", "exists", None), + ] + + def test_set_based_operator_returns_none(self): + assert parse_selector("env in (prod, staging)") is None + + def test_negation_returns_none(self): + assert parse_selector("!retired") is None + + def test_inequality_returns_none(self): + assert parse_selector("team!=infra") is None + + def test_empty_clause_returns_none(self): + assert parse_selector("team=infra,") is None + assert parse_selector(",team=infra") is None + + +class TestMatches: + @pytest.mark.parametrize( + "labels,terms,expected", + [ + ({"a": "1"}, [], True), + ({}, [("a", "exists", None)], False), + ({"a": ""}, [("a", "exists", None)], True), + ({"a": "1"}, [("a", "exists", None)], True), + ({"a": "1"}, [("a", "eq", "1")], True), + ({"a": "2"}, [("a", "eq", "1")], False), + ({"a": "1", "b": "x"}, [("a", "eq", "1"), ("b", "exists", None)], True), + ({"a": "1"}, [("a", "eq", "1"), ("b", "exists", None)], False), + ], + ) + def test_matches(self, labels, terms, expected): + assert matches(labels, terms) is expected diff --git a/server/tests/k8s/test_workload_mapper.py b/server/tests/k8s/test_workload_mapper.py new file mode 100644 index 000000000..657086516 --- /dev/null +++ b/server/tests/k8s/test_workload_mapper.py @@ -0,0 +1,105 @@ +# Copyright 2026 Alibaba Group Holding Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from opensandbox_server.services.k8s.workload_mapper import ( + _extract_platform_from_workload, +) + + +class TestExtractPlatformFromWorkload: + """Regression tests for _extract_platform_from_workload. + + The BatchSandbox CRD declares spec.template as an optional preserve-unknown-fields + object. In pool mode, the BatchSandbox CR is created with only ``poolRef`` and + ``taskTemplate`` under spec; the Kubernetes API server may then return the object + with ``spec.template`` explicitly set to ``None`` (because the field is part of the + schema but unset). Earlier code did ``spec.get("template", {}).get("spec")`` which + crashed in that case because the default ``{}`` is only returned when the key is + absent, not when its value is ``None``. + """ + + def test_pool_mode_workload_with_null_template_returns_none(self): + """Pool-mode BatchSandbox CR has spec.template == None; must not crash.""" + workload = { + "metadata": {"name": "sb-1", "namespace": "opensandbox-system"}, + "spec": { + "replicas": 1, + "poolRef": "pool-runc", + "template": None, # <-- this used to crash + "taskTemplate": {}, + }, + "status": {"replicas": 1, "ready": 1, "allocated": 1}, + } + # Should return None (no platform info), not raise. + assert _extract_platform_from_workload(workload) is None + + def test_pool_mode_workload_without_template_key_returns_none(self): + """Pool-mode BatchSandbox CR may also omit spec.template entirely.""" + workload = { + "metadata": {"name": "sb-1"}, + "spec": { + "replicas": 1, + "poolRef": "pool-runc", + }, + } + assert _extract_platform_from_workload(workload) is None + + def test_template_mode_with_full_platform_returns_platform(self): + """Template-mode workload with nodeSelector returns the declared platform.""" + workload = { + "metadata": {"name": "sb-1"}, + "spec": { + "replicas": 1, + "template": { + "spec": { + "nodeSelector": { + "kubernetes.io/os": "linux", + "kubernetes.io/arch": "amd64", + }, + }, + }, + }, + } + platform = _extract_platform_from_workload(workload) + assert platform is not None + assert platform.os == "linux" + assert platform.arch == "amd64" + + def test_pod_template_alias_still_works(self): + """Some workload types use ``podTemplate`` instead of ``template``.""" + workload = { + "spec": { + "podTemplate": { + "spec": { + "nodeSelector": { + "kubernetes.io/os": "linux", + "kubernetes.io/arch": "arm64", + }, + }, + }, + }, + } + platform = _extract_platform_from_workload(workload) + assert platform is not None + assert platform.os == "linux" + assert platform.arch == "arm64" + + def test_null_spec_returns_none(self): + """spec itself being None must not crash.""" + workload = {"metadata": {"name": "sb-1"}, "spec": None} + assert _extract_platform_from_workload(workload) is None + + def test_empty_workload_returns_none(self): + workload = {} + assert _extract_platform_from_workload(workload) is None diff --git a/server/tests/test_config.py b/server/tests/test_config.py index e45821670..2dd6e6e79 100644 --- a/server/tests/test_config.py +++ b/server/tests/test_config.py @@ -164,6 +164,67 @@ def test_server_config_defaults_include_max_sandbox_timeout(): assert server_cfg.max_sandbox_timeout_seconds is None +def test_server_config_uvicorn_tuning_defaults(): + """ServerConfig exposes uvicorn concurrency knobs with sensible defaults.""" + server_cfg = ServerConfig() + assert server_cfg.limit_concurrency == 1024 + assert server_cfg.backlog == 2048 + assert server_cfg.thread_pool_size == 200 + assert server_cfg.loop == "auto" + assert server_cfg.http == "auto" + + +def test_server_config_uvicorn_tuning_overrides(): + server_cfg = ServerConfig( + limit_concurrency=256, + backlog=4096, + loop="uvloop", + http="httptools", + ) + assert server_cfg.limit_concurrency == 256 + assert server_cfg.backlog == 4096 + assert server_cfg.loop == "uvloop" + assert server_cfg.http == "httptools" + + +def test_server_config_limit_concurrency_zero_disables_cap(): + """0 is the TOML-friendly disable sentinel and must collapse to None so + uvicorn applies no concurrency limit.""" + cfg = ServerConfig(limit_concurrency=0) + assert cfg.limit_concurrency is None + + +def test_server_config_limit_concurrency_accepts_none_and_positive(): + cfg = ServerConfig(limit_concurrency=None) + assert cfg.limit_concurrency is None + cfg = ServerConfig(limit_concurrency=512) + assert cfg.limit_concurrency == 512 + + +def test_server_config_limit_concurrency_rejects_negative(): + with pytest.raises(ValidationError): + ServerConfig(limit_concurrency=-1) + + +def test_server_config_backlog_must_be_positive(): + with pytest.raises(ValidationError): + ServerConfig(backlog=0) + + +def test_server_config_thread_pool_size_must_be_positive(): + with pytest.raises(ValidationError): + ServerConfig(thread_pool_size=0) + cfg = ServerConfig(thread_pool_size=512) + assert cfg.thread_pool_size == 512 + + +def test_server_config_loop_and_http_reject_unknown_values(): + with pytest.raises(ValidationError): + ServerConfig(loop="trio") # type: ignore[arg-type] + with pytest.raises(ValidationError): + ServerConfig(http="hyper") # type: ignore[arg-type] + + def test_store_defaults_to_sqlite(): cfg = StoreConfig() assert cfg.type == "sqlite" diff --git a/server/tests/test_docker_service.py b/server/tests/test_docker_service.py index 5344f3fa5..1bde714da 100644 --- a/server/tests/test_docker_service.py +++ b/server/tests/test_docker_service.py @@ -330,6 +330,29 @@ async def test_create_sandbox_rejects_invalid_metadata(mock_docker): assert exc.value.detail["code"] == SandboxErrorCodes.INVALID_METADATA_LABEL mock_client.containers.create.assert_not_called() +@pytest.mark.asyncio +@patch("opensandbox_server.services.docker.docker_service.docker") +async def test_create_sandbox_rejects_pool_ref_on_docker(mock_docker): + mock_client = MagicMock() + mock_client.containers.list.return_value = [] + mock_docker.from_env.return_value = mock_client + + service = DockerSandboxService(config=_app_config()) + + request = CreateSandboxRequest( + image=ImageSpec(uri="python:3.11"), + entrypoint=["python"], + resourceLimits=ResourceLimits(root={}), + extensions={"poolRef": "my-pool"}, + ) + + with pytest.raises(HTTPException) as exc: + await service.create_sandbox(request) + + assert exc.value.status_code == status.HTTP_400_BAD_REQUEST + assert exc.value.detail["code"] == "SANDBOX::UNSUPPORTED_POOL_REF" + mock_client.containers.create.assert_not_called() + @pytest.mark.asyncio @patch("opensandbox_server.services.docker.docker_service.docker") async def test_create_sandbox_rejects_timeout_above_configured_maximum(mock_docker): @@ -1433,7 +1456,7 @@ async def test_create_sandbox_windows_profile_injects_runtime_defaults(mock_dock mock_docker.from_env.return_value = mock_client cfg = _app_config() - cfg.runtime.execd_image = "ghcr.io/opensandbox/execd:v1.0.15" + cfg.runtime.execd_image = "ghcr.io/opensandbox/execd:v1.0.18" cfg.docker.network_mode = "bridge" service = DockerSandboxService(config=cfg) request = CreateSandboxRequest( @@ -1516,7 +1539,7 @@ async def test_create_sandbox_windows_profile_rejects_missing_runtime_devices(mo mock_docker.from_env.return_value = mock_client cfg = _app_config() - cfg.runtime.execd_image = "ghcr.io/opensandbox/execd:v1.0.15" + cfg.runtime.execd_image = "ghcr.io/opensandbox/execd:v1.0.18" cfg.docker.network_mode = "bridge" service = DockerSandboxService(config=cfg) request = CreateSandboxRequest( @@ -1555,7 +1578,7 @@ async def test_create_sandbox_windows_profile_rejects_below_minimum_resource_lim mock_docker.from_env.return_value = mock_client cfg = _app_config() - cfg.runtime.execd_image = "ghcr.io/opensandbox/execd:v1.0.15" + cfg.runtime.execd_image = "ghcr.io/opensandbox/execd:v1.0.18" cfg.docker.network_mode = "bridge" service = DockerSandboxService(config=cfg) request = CreateSandboxRequest( @@ -1592,7 +1615,7 @@ async def test_create_sandbox_windows_profile_accepts_dockur_demo_like_request(m mock_docker.from_env.return_value = mock_client cfg = _app_config() - cfg.runtime.execd_image = "ghcr.io/opensandbox/execd:v1.0.15" + cfg.runtime.execd_image = "ghcr.io/opensandbox/execd:v1.0.18" cfg.docker.network_mode = "bridge" service = DockerSandboxService(config=cfg) request = CreateSandboxRequest( @@ -1646,7 +1669,7 @@ async def test_create_sandbox_windows_profile_with_network_policy_maps_windows_p mock_docker.from_env.return_value = mock_client cfg = _app_config() - cfg.runtime.execd_image = "ghcr.io/opensandbox/execd:v1.0.15" + cfg.runtime.execd_image = "ghcr.io/opensandbox/execd:v1.0.18" cfg.docker.network_mode = "bridge" cfg.egress = EgressConfig(image="opensandbox/egress:latest") service = DockerSandboxService(config=cfg) diff --git a/server/tests/test_routes_list_sandboxes.py b/server/tests/test_routes_list_sandboxes.py index 0474ffee9..753addeb3 100644 --- a/server/tests/test_routes_list_sandboxes.py +++ b/server/tests/test_routes_list_sandboxes.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import time +from concurrent.futures import ThreadPoolExecutor from datetime import datetime, timedelta, timezone from fastapi.testclient import TestClient @@ -207,3 +209,49 @@ def test_list_sandboxes_requires_api_key(client: TestClient) -> None: assert response.status_code == 401 assert response.json()["code"] == "MISSING_API_KEY" + + +def test_list_sandboxes_runs_in_threadpool_for_concurrency( + client: TestClient, + auth_headers: dict, + monkeypatch, +) -> None: + """Blocking list calls must run in the threadpool so concurrent requests + do not serialize on the event loop. With sync def routes, FastAPI offloads + the handler to anyio's threadpool; 8 calls each sleeping 200ms should + complete well under the 1.6s serial bound. + """ + sleep_seconds = 0.2 + concurrency = 8 + + class SlowService: + @staticmethod + def list_sandboxes(_request) -> ListSandboxesResponse: + time.sleep(sleep_seconds) + return ListSandboxesResponse( + items=[], + pagination=PaginationInfo( + page=1, + pageSize=20, + totalItems=0, + totalPages=0, + hasNextPage=False, + ), + ) + + monkeypatch.setattr(lifecycle, "sandbox_service", SlowService()) + + def call() -> int: + return client.get("/v1/sandboxes", headers=auth_headers).status_code + + started = time.monotonic() + with ThreadPoolExecutor(max_workers=concurrency) as pool: + statuses = list(pool.map(lambda _: call(), range(concurrency))) + elapsed = time.monotonic() - started + + assert statuses == [200] * concurrency + serial_floor = sleep_seconds * concurrency + assert elapsed < serial_floor * 0.6, ( + f"list_sandboxes serialized: elapsed={elapsed:.2f}s, " + f"serial floor={serial_floor:.2f}s (threadpool offload broken)" + ) diff --git a/server/tests/test_schema.py b/server/tests/test_schema.py index 1676b11f4..867f373f2 100644 --- a/server/tests/test_schema.py +++ b/server/tests/test_schema.py @@ -601,3 +601,61 @@ def test_request_allows_timeout_above_previous_hardcoded_limit(self): assert request.timeout == 172800 +class TestCreateSandboxRequestPoolMode: + """Tests for pool mode (extensions.poolRef) validation.""" + + def test_pool_mode_accepts_only_pool_ref(self): + """Happy path: poolRef only, no image/entrypoint/resourceLimits required.""" + request = CreateSandboxRequest( + extensions={"poolRef": "my-pool"}, + ) + assert request.image is None + assert request.entrypoint is None + assert request.resource_limits is None + assert request.extensions["poolRef"] == "my-pool" + + def test_pool_mode_accepts_pool_ref_with_optional_fields(self): + """poolRef with optional env/metadata/timeout should be valid.""" + request = CreateSandboxRequest( + extensions={"poolRef": "my-pool"}, + env={"KEY": "value"}, + metadata={"team": "test"}, + timeout=600, + ) + assert request.extensions["poolRef"] == "my-pool" + assert request.env == {"KEY": "value"} + + def test_pool_mode_rejects_snapshot_id_with_pool_ref(self): + """snapshotId and poolRef cannot be used together.""" + with pytest.raises(ValidationError) as exc_info: + CreateSandboxRequest( + snapshotId="snap-001", + extensions={"poolRef": "my-pool"}, + ) + errors = exc_info.value.errors() + assert any("snapshotId" in str(e) and "poolRef" in str(e) for e in errors) + + def test_resource_limits_required_without_pool_ref(self): + """Without poolRef, resourceLimits is still required (image mode).""" + with pytest.raises(ValidationError): + CreateSandboxRequest( + image=ImageSpec(uri="python:3.11"), + entrypoint=["python"], + ) + + def test_pool_mode_normalizes_blank_snapshot_id(self): + """Blank snapshotId (e.g. whitespace) should be normalized to None in pool mode.""" + req = CreateSandboxRequest( + extensions={"poolRef": "my-pool"}, + snapshotId=" ", + ) + assert req.snapshot_id is None + + def test_pool_mode_ignores_blank_pool_ref(self): + """Blank poolRef should not trigger pool mode.""" + with pytest.raises(ValidationError): + CreateSandboxRequest( + extensions={"poolRef": " "}, + ) + + diff --git a/server/uv.lock b/server/uv.lock index 0dc237ed0..34c7ea8f1 100644 --- a/server/uv.lock +++ b/server/uv.lock @@ -347,6 +347,49 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" }, ] +[[package]] +name = "httptools" +version = "0.7.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b5/46/120a669232c7bdedb9d52d4aeae7e6c7dfe151e99dc70802e2fc7a5e1993/httptools-0.7.1.tar.gz", hash = "sha256:abd72556974f8e7c74a259655924a717a2365b236c882c3f6f8a45fe94703ac9", size = 258961, upload-time = "2025-10-10T03:55:08.559Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/e5/c07e0bcf4ec8db8164e9f6738c048b2e66aabf30e7506f440c4cc6953f60/httptools-0.7.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:11d01b0ff1fe02c4c32d60af61a4d613b74fad069e47e06e9067758c01e9ac78", size = 204531, upload-time = "2025-10-10T03:54:20.887Z" }, + { url = "https://files.pythonhosted.org/packages/7e/4f/35e3a63f863a659f92ffd92bef131f3e81cf849af26e6435b49bd9f6f751/httptools-0.7.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:84d86c1e5afdc479a6fdabf570be0d3eb791df0ae727e8dbc0259ed1249998d4", size = 109408, upload-time = "2025-10-10T03:54:22.455Z" }, + { url = "https://files.pythonhosted.org/packages/f5/71/b0a9193641d9e2471ac541d3b1b869538a5fb6419d52fd2669fa9c79e4b8/httptools-0.7.1-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:c8c751014e13d88d2be5f5f14fc8b89612fcfa92a9cc480f2bc1598357a23a05", size = 440889, upload-time = "2025-10-10T03:54:23.753Z" }, + { url = "https://files.pythonhosted.org/packages/eb/d9/2e34811397b76718750fea44658cb0205b84566e895192115252e008b152/httptools-0.7.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:654968cb6b6c77e37b832a9be3d3ecabb243bbe7a0b8f65fbc5b6b04c8fcabed", size = 440460, upload-time = "2025-10-10T03:54:25.313Z" }, + { url = "https://files.pythonhosted.org/packages/01/3f/a04626ebeacc489866bb4d82362c0657b2262bef381d68310134be7f40bb/httptools-0.7.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b580968316348b474b020edf3988eecd5d6eec4634ee6561e72ae3a2a0e00a8a", size = 425267, upload-time = "2025-10-10T03:54:26.81Z" }, + { url = "https://files.pythonhosted.org/packages/a5/99/adcd4f66614db627b587627c8ad6f4c55f18881549bab10ecf180562e7b9/httptools-0.7.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:d496e2f5245319da9d764296e86c5bb6fcf0cf7a8806d3d000717a889c8c0b7b", size = 424429, upload-time = "2025-10-10T03:54:28.174Z" }, + { url = "https://files.pythonhosted.org/packages/d5/72/ec8fc904a8fd30ba022dfa85f3bbc64c3c7cd75b669e24242c0658e22f3c/httptools-0.7.1-cp310-cp310-win_amd64.whl", hash = "sha256:cbf8317bfccf0fed3b5680c559d3459cccf1abe9039bfa159e62e391c7270568", size = 86173, upload-time = "2025-10-10T03:54:29.5Z" }, + { url = "https://files.pythonhosted.org/packages/9c/08/17e07e8d89ab8f343c134616d72eebfe03798835058e2ab579dcc8353c06/httptools-0.7.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:474d3b7ab469fefcca3697a10d11a32ee2b9573250206ba1e50d5980910da657", size = 206521, upload-time = "2025-10-10T03:54:31.002Z" }, + { url = "https://files.pythonhosted.org/packages/aa/06/c9c1b41ff52f16aee526fd10fbda99fa4787938aa776858ddc4a1ea825ec/httptools-0.7.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a3c3b7366bb6c7b96bd72d0dbe7f7d5eead261361f013be5f6d9590465ea1c70", size = 110375, upload-time = "2025-10-10T03:54:31.941Z" }, + { url = "https://files.pythonhosted.org/packages/cc/cc/10935db22fda0ee34c76f047590ca0a8bd9de531406a3ccb10a90e12ea21/httptools-0.7.1-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:379b479408b8747f47f3b253326183d7c009a3936518cdb70db58cffd369d9df", size = 456621, upload-time = "2025-10-10T03:54:33.176Z" }, + { url = "https://files.pythonhosted.org/packages/0e/84/875382b10d271b0c11aa5d414b44f92f8dd53e9b658aec338a79164fa548/httptools-0.7.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cad6b591a682dcc6cf1397c3900527f9affef1e55a06c4547264796bbd17cf5e", size = 454954, upload-time = "2025-10-10T03:54:34.226Z" }, + { url = "https://files.pythonhosted.org/packages/30/e1/44f89b280f7e46c0b1b2ccee5737d46b3bb13136383958f20b580a821ca0/httptools-0.7.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:eb844698d11433d2139bbeeb56499102143beb582bd6c194e3ba69c22f25c274", size = 440175, upload-time = "2025-10-10T03:54:35.942Z" }, + { url = "https://files.pythonhosted.org/packages/6f/7e/b9287763159e700e335028bc1824359dc736fa9b829dacedace91a39b37e/httptools-0.7.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f65744d7a8bdb4bda5e1fa23e4ba16832860606fcc09d674d56e425e991539ec", size = 440310, upload-time = "2025-10-10T03:54:37.1Z" }, + { url = "https://files.pythonhosted.org/packages/b3/07/5b614f592868e07f5c94b1f301b5e14a21df4e8076215a3bccb830a687d8/httptools-0.7.1-cp311-cp311-win_amd64.whl", hash = "sha256:135fbe974b3718eada677229312e97f3b31f8a9c8ffa3ae6f565bf808d5b6bcb", size = 86875, upload-time = "2025-10-10T03:54:38.421Z" }, + { url = "https://files.pythonhosted.org/packages/53/7f/403e5d787dc4942316e515e949b0c8a013d84078a915910e9f391ba9b3ed/httptools-0.7.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:38e0c83a2ea9746ebbd643bdfb521b9aa4a91703e2cd705c20443405d2fd16a5", size = 206280, upload-time = "2025-10-10T03:54:39.274Z" }, + { url = "https://files.pythonhosted.org/packages/2a/0d/7f3fd28e2ce311ccc998c388dd1c53b18120fda3b70ebb022b135dc9839b/httptools-0.7.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f25bbaf1235e27704f1a7b86cd3304eabc04f569c828101d94a0e605ef7205a5", size = 110004, upload-time = "2025-10-10T03:54:40.403Z" }, + { url = "https://files.pythonhosted.org/packages/84/a6/b3965e1e146ef5762870bbe76117876ceba51a201e18cc31f5703e454596/httptools-0.7.1-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2c15f37ef679ab9ecc06bfc4e6e8628c32a8e4b305459de7cf6785acd57e4d03", size = 517655, upload-time = "2025-10-10T03:54:41.347Z" }, + { url = "https://files.pythonhosted.org/packages/11/7d/71fee6f1844e6fa378f2eddde6c3e41ce3a1fb4b2d81118dd544e3441ec0/httptools-0.7.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7fe6e96090df46b36ccfaf746f03034e5ab723162bc51b0a4cf58305324036f2", size = 511440, upload-time = "2025-10-10T03:54:42.452Z" }, + { url = "https://files.pythonhosted.org/packages/22/a5/079d216712a4f3ffa24af4a0381b108aa9c45b7a5cc6eb141f81726b1823/httptools-0.7.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f72fdbae2dbc6e68b8239defb48e6a5937b12218e6ffc2c7846cc37befa84362", size = 495186, upload-time = "2025-10-10T03:54:43.937Z" }, + { url = "https://files.pythonhosted.org/packages/e9/9e/025ad7b65278745dee3bd0ebf9314934c4592560878308a6121f7f812084/httptools-0.7.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e99c7b90a29fd82fea9ef57943d501a16f3404d7b9ee81799d41639bdaae412c", size = 499192, upload-time = "2025-10-10T03:54:45.003Z" }, + { url = "https://files.pythonhosted.org/packages/6d/de/40a8f202b987d43afc4d54689600ff03ce65680ede2f31df348d7f368b8f/httptools-0.7.1-cp312-cp312-win_amd64.whl", hash = "sha256:3e14f530fefa7499334a79b0cf7e7cd2992870eb893526fb097d51b4f2d0f321", size = 86694, upload-time = "2025-10-10T03:54:45.923Z" }, + { url = "https://files.pythonhosted.org/packages/09/8f/c77b1fcbfd262d422f12da02feb0d218fa228d52485b77b953832105bb90/httptools-0.7.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6babce6cfa2a99545c60bfef8bee0cc0545413cb0018f617c8059a30ad985de3", size = 202889, upload-time = "2025-10-10T03:54:47.089Z" }, + { url = "https://files.pythonhosted.org/packages/0a/1a/22887f53602feaa066354867bc49a68fc295c2293433177ee90870a7d517/httptools-0.7.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:601b7628de7504077dd3dcb3791c6b8694bbd967148a6d1f01806509254fb1ca", size = 108180, upload-time = "2025-10-10T03:54:48.052Z" }, + { url = "https://files.pythonhosted.org/packages/32/6a/6aaa91937f0010d288d3d124ca2946d48d60c3a5ee7ca62afe870e3ea011/httptools-0.7.1-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:04c6c0e6c5fb0739c5b8a9eb046d298650a0ff38cf42537fc372b28dc7e4472c", size = 478596, upload-time = "2025-10-10T03:54:48.919Z" }, + { url = "https://files.pythonhosted.org/packages/6d/70/023d7ce117993107be88d2cbca566a7c1323ccbaf0af7eabf2064fe356f6/httptools-0.7.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:69d4f9705c405ae3ee83d6a12283dc9feba8cc6aaec671b412917e644ab4fa66", size = 473268, upload-time = "2025-10-10T03:54:49.993Z" }, + { url = "https://files.pythonhosted.org/packages/32/4d/9dd616c38da088e3f436e9a616e1d0cc66544b8cdac405cc4e81c8679fc7/httptools-0.7.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:44c8f4347d4b31269c8a9205d8a5ee2df5322b09bbbd30f8f862185bb6b05346", size = 455517, upload-time = "2025-10-10T03:54:51.066Z" }, + { url = "https://files.pythonhosted.org/packages/1d/3a/a6c595c310b7df958e739aae88724e24f9246a514d909547778d776799be/httptools-0.7.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:465275d76db4d554918aba40bf1cbebe324670f3dfc979eaffaa5d108e2ed650", size = 458337, upload-time = "2025-10-10T03:54:52.196Z" }, + { url = "https://files.pythonhosted.org/packages/fd/82/88e8d6d2c51edc1cc391b6e044c6c435b6aebe97b1abc33db1b0b24cd582/httptools-0.7.1-cp313-cp313-win_amd64.whl", hash = "sha256:322d00c2068d125bd570f7bf78b2d367dad02b919d8581d7476d8b75b294e3e6", size = 85743, upload-time = "2025-10-10T03:54:53.448Z" }, + { url = "https://files.pythonhosted.org/packages/34/50/9d095fcbb6de2d523e027a2f304d4551855c2f46e0b82befd718b8b20056/httptools-0.7.1-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:c08fe65728b8d70b6923ce31e3956f859d5e1e8548e6f22ec520a962c6757270", size = 203619, upload-time = "2025-10-10T03:54:54.321Z" }, + { url = "https://files.pythonhosted.org/packages/07/f0/89720dc5139ae54b03f861b5e2c55a37dba9a5da7d51e1e824a1f343627f/httptools-0.7.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:7aea2e3c3953521c3c51106ee11487a910d45586e351202474d45472db7d72d3", size = 108714, upload-time = "2025-10-10T03:54:55.163Z" }, + { url = "https://files.pythonhosted.org/packages/b3/cb/eea88506f191fb552c11787c23f9a405f4c7b0c5799bf73f2249cd4f5228/httptools-0.7.1-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:0e68b8582f4ea9166be62926077a3334064d422cf08ab87d8b74664f8e9058e1", size = 472909, upload-time = "2025-10-10T03:54:56.056Z" }, + { url = "https://files.pythonhosted.org/packages/e0/4a/a548bdfae6369c0d078bab5769f7b66f17f1bfaa6fa28f81d6be6959066b/httptools-0.7.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:df091cf961a3be783d6aebae963cc9b71e00d57fa6f149025075217bc6a55a7b", size = 470831, upload-time = "2025-10-10T03:54:57.219Z" }, + { url = "https://files.pythonhosted.org/packages/4d/31/14df99e1c43bd132eec921c2e7e11cda7852f65619bc0fc5bdc2d0cb126c/httptools-0.7.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:f084813239e1eb403ddacd06a30de3d3e09a9b76e7894dcda2b22f8a726e9c60", size = 452631, upload-time = "2025-10-10T03:54:58.219Z" }, + { url = "https://files.pythonhosted.org/packages/22/d2/b7e131f7be8d854d48cb6d048113c30f9a46dca0c9a8b08fcb3fcd588cdc/httptools-0.7.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7347714368fb2b335e9063bc2b96f2f87a9ceffcd9758ac295f8bbcd3ffbc0ca", size = 452910, upload-time = "2025-10-10T03:54:59.366Z" }, + { url = "https://files.pythonhosted.org/packages/53/cf/878f3b91e4e6e011eff6d1fa9ca39f7eb17d19c9d7971b04873734112f30/httptools-0.7.1-cp314-cp314-win_amd64.whl", hash = "sha256:cfabda2a5bb85aa2a904ce06d974a3f30fb36cc63d7feaddec05d2050acede96", size = 88205, upload-time = "2025-10-10T03:55:00.389Z" }, +] + [[package]] name = "httpx" version = "0.28.1" @@ -436,7 +479,7 @@ dependencies = [ { name = "pyyaml" }, { name = "redis" }, { name = "tomli", marker = "python_full_version < '3.11'" }, - { name = "uvicorn" }, + { name = "uvicorn", extra = ["standard"] }, { name = "websockets" }, ] @@ -460,7 +503,7 @@ requires-dist = [ { name = "pyyaml" }, { name = "redis", specifier = ">=5" }, { name = "tomli", marker = "python_full_version < '3.11'" }, - { name = "uvicorn" }, + { name = "uvicorn", extras = ["standard"] }, { name = "websockets", specifier = ">=14.0" }, ] @@ -1003,6 +1046,164 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ee/d9/d88e73ca598f4f6ff671fb5fde8a32925c2e08a637303a1d12883c7305fa/uvicorn-0.38.0-py3-none-any.whl", hash = "sha256:48c0afd214ceb59340075b4a052ea1ee91c16fbc2a9b1469cca0e54566977b02", size = 68109, upload-time = "2025-10-18T13:46:42.958Z" }, ] +[package.optional-dependencies] +standard = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "httptools" }, + { name = "python-dotenv" }, + { name = "pyyaml" }, + { name = "uvloop", marker = "platform_python_implementation != 'PyPy' and sys_platform != 'cygwin' and sys_platform != 'win32'" }, + { name = "watchfiles" }, + { name = "websockets" }, +] + +[[package]] +name = "uvloop" +version = "0.22.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/06/f0/18d39dbd1971d6d62c4629cc7fa67f74821b0dc1f5a77af43719de7936a7/uvloop-0.22.1.tar.gz", hash = "sha256:6c84bae345b9147082b17371e3dd5d42775bddce91f885499017f4607fdaf39f", size = 2443250, upload-time = "2025-10-16T22:17:19.342Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/eb/14/ecceb239b65adaaf7fde510aa8bd534075695d1e5f8dadfa32b5723d9cfb/uvloop-0.22.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ef6f0d4cc8a9fa1f6a910230cd53545d9a14479311e87e3cb225495952eb672c", size = 1343335, upload-time = "2025-10-16T22:16:11.43Z" }, + { url = "https://files.pythonhosted.org/packages/ba/ae/6f6f9af7f590b319c94532b9567409ba11f4fa71af1148cab1bf48a07048/uvloop-0.22.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7cd375a12b71d33d46af85a3343b35d98e8116134ba404bd657b3b1d15988792", size = 742903, upload-time = "2025-10-16T22:16:12.979Z" }, + { url = "https://files.pythonhosted.org/packages/09/bd/3667151ad0702282a1f4d5d29288fce8a13c8b6858bf0978c219cd52b231/uvloop-0.22.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ac33ed96229b7790eb729702751c0e93ac5bc3bcf52ae9eccbff30da09194b86", size = 3648499, upload-time = "2025-10-16T22:16:14.451Z" }, + { url = "https://files.pythonhosted.org/packages/b3/f6/21657bb3beb5f8c57ce8be3b83f653dd7933c2fd00545ed1b092d464799a/uvloop-0.22.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:481c990a7abe2c6f4fc3d98781cc9426ebd7f03a9aaa7eb03d3bfc68ac2a46bd", size = 3700133, upload-time = "2025-10-16T22:16:16.272Z" }, + { url = "https://files.pythonhosted.org/packages/09/e0/604f61d004ded805f24974c87ddd8374ef675644f476f01f1df90e4cdf72/uvloop-0.22.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a592b043a47ad17911add5fbd087c76716d7c9ccc1d64ec9249ceafd735f03c2", size = 3512681, upload-time = "2025-10-16T22:16:18.07Z" }, + { url = "https://files.pythonhosted.org/packages/bb/ce/8491fd370b0230deb5eac69c7aae35b3be527e25a911c0acdffb922dc1cd/uvloop-0.22.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:1489cf791aa7b6e8c8be1c5a080bae3a672791fcb4e9e12249b05862a2ca9cec", size = 3615261, upload-time = "2025-10-16T22:16:19.596Z" }, + { url = "https://files.pythonhosted.org/packages/c7/d5/69900f7883235562f1f50d8184bb7dd84a2fb61e9ec63f3782546fdbd057/uvloop-0.22.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c60ebcd36f7b240b30788554b6f0782454826a0ed765d8430652621b5de674b9", size = 1352420, upload-time = "2025-10-16T22:16:21.187Z" }, + { url = "https://files.pythonhosted.org/packages/a8/73/c4e271b3bce59724e291465cc936c37758886a4868787da0278b3b56b905/uvloop-0.22.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3b7f102bf3cb1995cfeaee9321105e8f5da76fdb104cdad8986f85461a1b7b77", size = 748677, upload-time = "2025-10-16T22:16:22.558Z" }, + { url = "https://files.pythonhosted.org/packages/86/94/9fb7fad2f824d25f8ecac0d70b94d0d48107ad5ece03769a9c543444f78a/uvloop-0.22.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:53c85520781d84a4b8b230e24a5af5b0778efdb39142b424990ff1ef7c48ba21", size = 3753819, upload-time = "2025-10-16T22:16:23.903Z" }, + { url = "https://files.pythonhosted.org/packages/74/4f/256aca690709e9b008b7108bc85fba619a2bc37c6d80743d18abad16ee09/uvloop-0.22.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:56a2d1fae65fd82197cb8c53c367310b3eabe1bbb9fb5a04d28e3e3520e4f702", size = 3804529, upload-time = "2025-10-16T22:16:25.246Z" }, + { url = "https://files.pythonhosted.org/packages/7f/74/03c05ae4737e871923d21a76fe28b6aad57f5c03b6e6bfcfa5ad616013e4/uvloop-0.22.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:40631b049d5972c6755b06d0bfe8233b1bd9a8a6392d9d1c45c10b6f9e9b2733", size = 3621267, upload-time = "2025-10-16T22:16:26.819Z" }, + { url = "https://files.pythonhosted.org/packages/75/be/f8e590fe61d18b4a92070905497aec4c0e64ae1761498cad09023f3f4b3e/uvloop-0.22.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:535cc37b3a04f6cd2c1ef65fa1d370c9a35b6695df735fcff5427323f2cd5473", size = 3723105, upload-time = "2025-10-16T22:16:28.252Z" }, + { url = "https://files.pythonhosted.org/packages/3d/ff/7f72e8170be527b4977b033239a83a68d5c881cc4775fca255c677f7ac5d/uvloop-0.22.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:fe94b4564e865d968414598eea1a6de60adba0c040ba4ed05ac1300de402cd42", size = 1359936, upload-time = "2025-10-16T22:16:29.436Z" }, + { url = "https://files.pythonhosted.org/packages/c3/c6/e5d433f88fd54d81ef4be58b2b7b0cea13c442454a1db703a1eea0db1a59/uvloop-0.22.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:51eb9bd88391483410daad430813d982010f9c9c89512321f5b60e2cddbdddd6", size = 752769, upload-time = "2025-10-16T22:16:30.493Z" }, + { url = "https://files.pythonhosted.org/packages/24/68/a6ac446820273e71aa762fa21cdcc09861edd3536ff47c5cd3b7afb10eeb/uvloop-0.22.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:700e674a166ca5778255e0e1dc4e9d79ab2acc57b9171b79e65feba7184b3370", size = 4317413, upload-time = "2025-10-16T22:16:31.644Z" }, + { url = "https://files.pythonhosted.org/packages/5f/6f/e62b4dfc7ad6518e7eff2516f680d02a0f6eb62c0c212e152ca708a0085e/uvloop-0.22.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7b5b1ac819a3f946d3b2ee07f09149578ae76066d70b44df3fa990add49a82e4", size = 4426307, upload-time = "2025-10-16T22:16:32.917Z" }, + { url = "https://files.pythonhosted.org/packages/90/60/97362554ac21e20e81bcef1150cb2a7e4ffdaf8ea1e5b2e8bf7a053caa18/uvloop-0.22.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e047cc068570bac9866237739607d1313b9253c3051ad84738cbb095be0537b2", size = 4131970, upload-time = "2025-10-16T22:16:34.015Z" }, + { url = "https://files.pythonhosted.org/packages/99/39/6b3f7d234ba3964c428a6e40006340f53ba37993f46ed6e111c6e9141d18/uvloop-0.22.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:512fec6815e2dd45161054592441ef76c830eddaad55c8aa30952e6fe1ed07c0", size = 4296343, upload-time = "2025-10-16T22:16:35.149Z" }, + { url = "https://files.pythonhosted.org/packages/89/8c/182a2a593195bfd39842ea68ebc084e20c850806117213f5a299dfc513d9/uvloop-0.22.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:561577354eb94200d75aca23fbde86ee11be36b00e52a4eaf8f50fb0c86b7705", size = 1358611, upload-time = "2025-10-16T22:16:36.833Z" }, + { url = "https://files.pythonhosted.org/packages/d2/14/e301ee96a6dc95224b6f1162cd3312f6d1217be3907b79173b06785f2fe7/uvloop-0.22.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1cdf5192ab3e674ca26da2eada35b288d2fa49fdd0f357a19f0e7c4e7d5077c8", size = 751811, upload-time = "2025-10-16T22:16:38.275Z" }, + { url = "https://files.pythonhosted.org/packages/b7/02/654426ce265ac19e2980bfd9ea6590ca96a56f10c76e63801a2df01c0486/uvloop-0.22.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e2ea3d6190a2968f4a14a23019d3b16870dd2190cd69c8180f7c632d21de68d", size = 4288562, upload-time = "2025-10-16T22:16:39.375Z" }, + { url = "https://files.pythonhosted.org/packages/15/c0/0be24758891ef825f2065cd5db8741aaddabe3e248ee6acc5e8a80f04005/uvloop-0.22.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0530a5fbad9c9e4ee3f2b33b148c6a64d47bbad8000ea63704fa8260f4cf728e", size = 4366890, upload-time = "2025-10-16T22:16:40.547Z" }, + { url = "https://files.pythonhosted.org/packages/d2/53/8369e5219a5855869bcee5f4d317f6da0e2c669aecf0ef7d371e3d084449/uvloop-0.22.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bc5ef13bbc10b5335792360623cc378d52d7e62c2de64660616478c32cd0598e", size = 4119472, upload-time = "2025-10-16T22:16:41.694Z" }, + { url = "https://files.pythonhosted.org/packages/f8/ba/d69adbe699b768f6b29a5eec7b47dd610bd17a69de51b251126a801369ea/uvloop-0.22.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1f38ec5e3f18c8a10ded09742f7fb8de0108796eb673f30ce7762ce1b8550cad", size = 4239051, upload-time = "2025-10-16T22:16:43.224Z" }, + { url = "https://files.pythonhosted.org/packages/90/cd/b62bdeaa429758aee8de8b00ac0dd26593a9de93d302bff3d21439e9791d/uvloop-0.22.1-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:3879b88423ec7e97cd4eba2a443aa26ed4e59b45e6b76aabf13fe2f27023a142", size = 1362067, upload-time = "2025-10-16T22:16:44.503Z" }, + { url = "https://files.pythonhosted.org/packages/0d/f8/a132124dfda0777e489ca86732e85e69afcd1ff7686647000050ba670689/uvloop-0.22.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:4baa86acedf1d62115c1dc6ad1e17134476688f08c6efd8a2ab076e815665c74", size = 752423, upload-time = "2025-10-16T22:16:45.968Z" }, + { url = "https://files.pythonhosted.org/packages/a3/94/94af78c156f88da4b3a733773ad5ba0b164393e357cc4bd0ab2e2677a7d6/uvloop-0.22.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:297c27d8003520596236bdb2335e6b3f649480bd09e00d1e3a99144b691d2a35", size = 4272437, upload-time = "2025-10-16T22:16:47.451Z" }, + { url = "https://files.pythonhosted.org/packages/b5/35/60249e9fd07b32c665192cec7af29e06c7cd96fa1d08b84f012a56a0b38e/uvloop-0.22.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c1955d5a1dd43198244d47664a5858082a3239766a839b2102a269aaff7a4e25", size = 4292101, upload-time = "2025-10-16T22:16:49.318Z" }, + { url = "https://files.pythonhosted.org/packages/02/62/67d382dfcb25d0a98ce73c11ed1a6fba5037a1a1d533dcbb7cab033a2636/uvloop-0.22.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b31dc2fccbd42adc73bc4e7cdbae4fc5086cf378979e53ca5d0301838c5682c6", size = 4114158, upload-time = "2025-10-16T22:16:50.517Z" }, + { url = "https://files.pythonhosted.org/packages/f0/7a/f1171b4a882a5d13c8b7576f348acfe6074d72eaf52cccef752f748d4a9f/uvloop-0.22.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:93f617675b2d03af4e72a5333ef89450dfaa5321303ede6e67ba9c9d26878079", size = 4177360, upload-time = "2025-10-16T22:16:52.646Z" }, + { url = "https://files.pythonhosted.org/packages/79/7b/b01414f31546caf0919da80ad57cbfe24c56b151d12af68cee1b04922ca8/uvloop-0.22.1-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:37554f70528f60cad66945b885eb01f1bb514f132d92b6eeed1c90fd54ed6289", size = 1454790, upload-time = "2025-10-16T22:16:54.355Z" }, + { url = "https://files.pythonhosted.org/packages/d4/31/0bb232318dd838cad3fa8fb0c68c8b40e1145b32025581975e18b11fab40/uvloop-0.22.1-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:b76324e2dc033a0b2f435f33eb88ff9913c156ef78e153fb210e03c13da746b3", size = 796783, upload-time = "2025-10-16T22:16:55.906Z" }, + { url = "https://files.pythonhosted.org/packages/42/38/c9b09f3271a7a723a5de69f8e237ab8e7803183131bc57c890db0b6bb872/uvloop-0.22.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:badb4d8e58ee08dad957002027830d5c3b06aea446a6a3744483c2b3b745345c", size = 4647548, upload-time = "2025-10-16T22:16:57.008Z" }, + { url = "https://files.pythonhosted.org/packages/c1/37/945b4ca0ac27e3dc4952642d4c900edd030b3da6c9634875af6e13ae80e5/uvloop-0.22.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b91328c72635f6f9e0282e4a57da7470c7350ab1c9f48546c0f2866205349d21", size = 4467065, upload-time = "2025-10-16T22:16:58.206Z" }, + { url = "https://files.pythonhosted.org/packages/97/cc/48d232f33d60e2e2e0b42f4e73455b146b76ebe216487e862700457fbf3c/uvloop-0.22.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:daf620c2995d193449393d6c62131b3fbd40a63bf7b307a1527856ace637fe88", size = 4328384, upload-time = "2025-10-16T22:16:59.36Z" }, + { url = "https://files.pythonhosted.org/packages/e4/16/c1fd27e9549f3c4baf1dc9c20c456cd2f822dbf8de9f463824b0c0357e06/uvloop-0.22.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6cde23eeda1a25c75b2e07d39970f3374105d5eafbaab2a4482be82f272d5a5e", size = 4296730, upload-time = "2025-10-16T22:17:00.744Z" }, +] + +[[package]] +name = "watchfiles" +version = "1.1.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c2/c9/8869df9b2a2d6c59d79220a4db37679e74f807c559ffe5265e08b227a210/watchfiles-1.1.1.tar.gz", hash = "sha256:a173cb5c16c4f40ab19cecf48a534c409f7ea983ab8fed0741304a1c0a31b3f2", size = 94440, upload-time = "2025-10-14T15:06:21.08Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a7/1a/206e8cf2dd86fddf939165a57b4df61607a1e0add2785f170a3f616b7d9f/watchfiles-1.1.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:eef58232d32daf2ac67f42dea51a2c80f0d03379075d44a587051e63cc2e368c", size = 407318, upload-time = "2025-10-14T15:04:18.753Z" }, + { url = "https://files.pythonhosted.org/packages/b3/0f/abaf5262b9c496b5dad4ed3c0e799cbecb1f8ea512ecb6ddd46646a9fca3/watchfiles-1.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:03fa0f5237118a0c5e496185cafa92878568b652a2e9a9382a5151b1a0380a43", size = 394478, upload-time = "2025-10-14T15:04:20.297Z" }, + { url = "https://files.pythonhosted.org/packages/b1/04/9cc0ba88697b34b755371f5ace8d3a4d9a15719c07bdc7bd13d7d8c6a341/watchfiles-1.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8ca65483439f9c791897f7db49202301deb6e15fe9f8fe2fed555bf986d10c31", size = 449894, upload-time = "2025-10-14T15:04:21.527Z" }, + { url = "https://files.pythonhosted.org/packages/d2/9c/eda4615863cd8621e89aed4df680d8c3ec3da6a4cf1da113c17decd87c7f/watchfiles-1.1.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f0ab1c1af0cb38e3f598244c17919fb1a84d1629cc08355b0074b6d7f53138ac", size = 459065, upload-time = "2025-10-14T15:04:22.795Z" }, + { url = "https://files.pythonhosted.org/packages/84/13/f28b3f340157d03cbc8197629bc109d1098764abe1e60874622a0be5c112/watchfiles-1.1.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3bc570d6c01c206c46deb6e935a260be44f186a2f05179f52f7fcd2be086a94d", size = 488377, upload-time = "2025-10-14T15:04:24.138Z" }, + { url = "https://files.pythonhosted.org/packages/86/93/cfa597fa9389e122488f7ffdbd6db505b3b915ca7435ecd7542e855898c2/watchfiles-1.1.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e84087b432b6ac94778de547e08611266f1f8ffad28c0ee4c82e028b0fc5966d", size = 595837, upload-time = "2025-10-14T15:04:25.057Z" }, + { url = "https://files.pythonhosted.org/packages/57/1e/68c1ed5652b48d89fc24d6af905d88ee4f82fa8bc491e2666004e307ded1/watchfiles-1.1.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:620bae625f4cb18427b1bb1a2d9426dc0dd5a5ba74c7c2cdb9de405f7b129863", size = 473456, upload-time = "2025-10-14T15:04:26.497Z" }, + { url = "https://files.pythonhosted.org/packages/d5/dc/1a680b7458ffa3b14bb64878112aefc8f2e4f73c5af763cbf0bd43100658/watchfiles-1.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:544364b2b51a9b0c7000a4b4b02f90e9423d97fbbf7e06689236443ebcad81ab", size = 455614, upload-time = "2025-10-14T15:04:27.539Z" }, + { url = "https://files.pythonhosted.org/packages/61/a5/3d782a666512e01eaa6541a72ebac1d3aae191ff4a31274a66b8dd85760c/watchfiles-1.1.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:bbe1ef33d45bc71cf21364df962af171f96ecaeca06bd9e3d0b583efb12aec82", size = 630690, upload-time = "2025-10-14T15:04:28.495Z" }, + { url = "https://files.pythonhosted.org/packages/9b/73/bb5f38590e34687b2a9c47a244aa4dd50c56a825969c92c9c5fc7387cea1/watchfiles-1.1.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1a0bb430adb19ef49389e1ad368450193a90038b5b752f4ac089ec6942c4dff4", size = 622459, upload-time = "2025-10-14T15:04:29.491Z" }, + { url = "https://files.pythonhosted.org/packages/f1/ac/c9bb0ec696e07a20bd58af5399aeadaef195fb2c73d26baf55180fe4a942/watchfiles-1.1.1-cp310-cp310-win32.whl", hash = "sha256:3f6d37644155fb5beca5378feb8c1708d5783145f2a0f1c4d5a061a210254844", size = 272663, upload-time = "2025-10-14T15:04:30.435Z" }, + { url = "https://files.pythonhosted.org/packages/11/a0/a60c5a7c2ec59fa062d9a9c61d02e3b6abd94d32aac2d8344c4bdd033326/watchfiles-1.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:a36d8efe0f290835fd0f33da35042a1bb5dc0e83cbc092dcf69bce442579e88e", size = 287453, upload-time = "2025-10-14T15:04:31.53Z" }, + { url = "https://files.pythonhosted.org/packages/1f/f8/2c5f479fb531ce2f0564eda479faecf253d886b1ab3630a39b7bf7362d46/watchfiles-1.1.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:f57b396167a2565a4e8b5e56a5a1c537571733992b226f4f1197d79e94cf0ae5", size = 406529, upload-time = "2025-10-14T15:04:32.899Z" }, + { url = "https://files.pythonhosted.org/packages/fe/cd/f515660b1f32f65df671ddf6f85bfaca621aee177712874dc30a97397977/watchfiles-1.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:421e29339983e1bebc281fab40d812742268ad057db4aee8c4d2bce0af43b741", size = 394384, upload-time = "2025-10-14T15:04:33.761Z" }, + { url = "https://files.pythonhosted.org/packages/7b/c3/28b7dc99733eab43fca2d10f55c86e03bd6ab11ca31b802abac26b23d161/watchfiles-1.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6e43d39a741e972bab5d8100b5cdacf69db64e34eb19b6e9af162bccf63c5cc6", size = 448789, upload-time = "2025-10-14T15:04:34.679Z" }, + { url = "https://files.pythonhosted.org/packages/4a/24/33e71113b320030011c8e4316ccca04194bf0cbbaeee207f00cbc7d6b9f5/watchfiles-1.1.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f537afb3276d12814082a2e9b242bdcf416c2e8fd9f799a737990a1dbe906e5b", size = 460521, upload-time = "2025-10-14T15:04:35.963Z" }, + { url = "https://files.pythonhosted.org/packages/f4/c3/3c9a55f255aa57b91579ae9e98c88704955fa9dac3e5614fb378291155df/watchfiles-1.1.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b2cd9e04277e756a2e2d2543d65d1e2166d6fd4c9b183f8808634fda23f17b14", size = 488722, upload-time = "2025-10-14T15:04:37.091Z" }, + { url = "https://files.pythonhosted.org/packages/49/36/506447b73eb46c120169dc1717fe2eff07c234bb3232a7200b5f5bd816e9/watchfiles-1.1.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5f3f58818dc0b07f7d9aa7fe9eb1037aecb9700e63e1f6acfed13e9fef648f5d", size = 596088, upload-time = "2025-10-14T15:04:38.39Z" }, + { url = "https://files.pythonhosted.org/packages/82/ab/5f39e752a9838ec4d52e9b87c1e80f1ee3ccdbe92e183c15b6577ab9de16/watchfiles-1.1.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9bb9f66367023ae783551042d31b1d7fd422e8289eedd91f26754a66f44d5cff", size = 472923, upload-time = "2025-10-14T15:04:39.666Z" }, + { url = "https://files.pythonhosted.org/packages/af/b9/a419292f05e302dea372fa7e6fda5178a92998411f8581b9830d28fb9edb/watchfiles-1.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aebfd0861a83e6c3d1110b78ad54704486555246e542be3e2bb94195eabb2606", size = 456080, upload-time = "2025-10-14T15:04:40.643Z" }, + { url = "https://files.pythonhosted.org/packages/b0/c3/d5932fd62bde1a30c36e10c409dc5d54506726f08cb3e1d8d0ba5e2bc8db/watchfiles-1.1.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:5fac835b4ab3c6487b5dbad78c4b3724e26bcc468e886f8ba8cc4306f68f6701", size = 629432, upload-time = "2025-10-14T15:04:41.789Z" }, + { url = "https://files.pythonhosted.org/packages/f7/77/16bddd9779fafb795f1a94319dc965209c5641db5bf1edbbccace6d1b3c0/watchfiles-1.1.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:399600947b170270e80134ac854e21b3ccdefa11a9529a3decc1327088180f10", size = 623046, upload-time = "2025-10-14T15:04:42.718Z" }, + { url = "https://files.pythonhosted.org/packages/46/ef/f2ecb9a0f342b4bfad13a2787155c6ee7ce792140eac63a34676a2feeef2/watchfiles-1.1.1-cp311-cp311-win32.whl", hash = "sha256:de6da501c883f58ad50db3a32ad397b09ad29865b5f26f64c24d3e3281685849", size = 271473, upload-time = "2025-10-14T15:04:43.624Z" }, + { url = "https://files.pythonhosted.org/packages/94/bc/f42d71125f19731ea435c3948cad148d31a64fccde3867e5ba4edee901f9/watchfiles-1.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:35c53bd62a0b885bf653ebf6b700d1bf05debb78ad9292cf2a942b23513dc4c4", size = 287598, upload-time = "2025-10-14T15:04:44.516Z" }, + { url = "https://files.pythonhosted.org/packages/57/c9/a30f897351f95bbbfb6abcadafbaca711ce1162f4db95fc908c98a9165f3/watchfiles-1.1.1-cp311-cp311-win_arm64.whl", hash = "sha256:57ca5281a8b5e27593cb7d82c2ac927ad88a96ed406aa446f6344e4328208e9e", size = 277210, upload-time = "2025-10-14T15:04:45.883Z" }, + { url = "https://files.pythonhosted.org/packages/74/d5/f039e7e3c639d9b1d09b07ea412a6806d38123f0508e5f9b48a87b0a76cc/watchfiles-1.1.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:8c89f9f2f740a6b7dcc753140dd5e1ab9215966f7a3530d0c0705c83b401bd7d", size = 404745, upload-time = "2025-10-14T15:04:46.731Z" }, + { url = "https://files.pythonhosted.org/packages/a5/96/a881a13aa1349827490dab2d363c8039527060cfcc2c92cc6d13d1b1049e/watchfiles-1.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:bd404be08018c37350f0d6e34676bd1e2889990117a2b90070b3007f172d0610", size = 391769, upload-time = "2025-10-14T15:04:48.003Z" }, + { url = "https://files.pythonhosted.org/packages/4b/5b/d3b460364aeb8da471c1989238ea0e56bec24b6042a68046adf3d9ddb01c/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8526e8f916bb5b9a0a777c8317c23ce65de259422bba5b31325a6fa6029d33af", size = 449374, upload-time = "2025-10-14T15:04:49.179Z" }, + { url = "https://files.pythonhosted.org/packages/b9/44/5769cb62d4ed055cb17417c0a109a92f007114a4e07f30812a73a4efdb11/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2edc3553362b1c38d9f06242416a5d8e9fe235c204a4072e988ce2e5bb1f69f6", size = 459485, upload-time = "2025-10-14T15:04:50.155Z" }, + { url = "https://files.pythonhosted.org/packages/19/0c/286b6301ded2eccd4ffd0041a1b726afda999926cf720aab63adb68a1e36/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:30f7da3fb3f2844259cba4720c3fc7138eb0f7b659c38f3bfa65084c7fc7abce", size = 488813, upload-time = "2025-10-14T15:04:51.059Z" }, + { url = "https://files.pythonhosted.org/packages/c7/2b/8530ed41112dd4a22f4dcfdb5ccf6a1baad1ff6eed8dc5a5f09e7e8c41c7/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f8979280bdafff686ba5e4d8f97840f929a87ed9cdf133cbbd42f7766774d2aa", size = 594816, upload-time = "2025-10-14T15:04:52.031Z" }, + { url = "https://files.pythonhosted.org/packages/ce/d2/f5f9fb49489f184f18470d4f99f4e862a4b3e9ac2865688eb2099e3d837a/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dcc5c24523771db3a294c77d94771abcfcb82a0e0ee8efd910c37c59ec1b31bb", size = 475186, upload-time = "2025-10-14T15:04:53.064Z" }, + { url = "https://files.pythonhosted.org/packages/cf/68/5707da262a119fb06fbe214d82dd1fe4a6f4af32d2d14de368d0349eb52a/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1db5d7ae38ff20153d542460752ff397fcf5c96090c1230803713cf3147a6803", size = 456812, upload-time = "2025-10-14T15:04:55.174Z" }, + { url = "https://files.pythonhosted.org/packages/66/ab/3cbb8756323e8f9b6f9acb9ef4ec26d42b2109bce830cc1f3468df20511d/watchfiles-1.1.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:28475ddbde92df1874b6c5c8aaeb24ad5be47a11f87cde5a28ef3835932e3e94", size = 630196, upload-time = "2025-10-14T15:04:56.22Z" }, + { url = "https://files.pythonhosted.org/packages/78/46/7152ec29b8335f80167928944a94955015a345440f524d2dfe63fc2f437b/watchfiles-1.1.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:36193ed342f5b9842edd3532729a2ad55c4160ffcfa3700e0d54be496b70dd43", size = 622657, upload-time = "2025-10-14T15:04:57.521Z" }, + { url = "https://files.pythonhosted.org/packages/0a/bf/95895e78dd75efe9a7f31733607f384b42eb5feb54bd2eb6ed57cc2e94f4/watchfiles-1.1.1-cp312-cp312-win32.whl", hash = "sha256:859e43a1951717cc8de7f4c77674a6d389b106361585951d9e69572823f311d9", size = 272042, upload-time = "2025-10-14T15:04:59.046Z" }, + { url = "https://files.pythonhosted.org/packages/87/0a/90eb755f568de2688cb220171c4191df932232c20946966c27a59c400850/watchfiles-1.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:91d4c9a823a8c987cce8fa2690923b069966dabb196dd8d137ea2cede885fde9", size = 288410, upload-time = "2025-10-14T15:05:00.081Z" }, + { url = "https://files.pythonhosted.org/packages/36/76/f322701530586922fbd6723c4f91ace21364924822a8772c549483abed13/watchfiles-1.1.1-cp312-cp312-win_arm64.whl", hash = "sha256:a625815d4a2bdca61953dbba5a39d60164451ef34c88d751f6c368c3ea73d404", size = 278209, upload-time = "2025-10-14T15:05:01.168Z" }, + { url = "https://files.pythonhosted.org/packages/bb/f4/f750b29225fe77139f7ae5de89d4949f5a99f934c65a1f1c0b248f26f747/watchfiles-1.1.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:130e4876309e8686a5e37dba7d5e9bc77e6ed908266996ca26572437a5271e18", size = 404321, upload-time = "2025-10-14T15:05:02.063Z" }, + { url = "https://files.pythonhosted.org/packages/2b/f9/f07a295cde762644aa4c4bb0f88921d2d141af45e735b965fb2e87858328/watchfiles-1.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5f3bde70f157f84ece3765b42b4a52c6ac1a50334903c6eaf765362f6ccca88a", size = 391783, upload-time = "2025-10-14T15:05:03.052Z" }, + { url = "https://files.pythonhosted.org/packages/bc/11/fc2502457e0bea39a5c958d86d2cb69e407a4d00b85735ca724bfa6e0d1a/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:14e0b1fe858430fc0251737ef3824c54027bedb8c37c38114488b8e131cf8219", size = 449279, upload-time = "2025-10-14T15:05:04.004Z" }, + { url = "https://files.pythonhosted.org/packages/e3/1f/d66bc15ea0b728df3ed96a539c777acfcad0eb78555ad9efcaa1274688f0/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f27db948078f3823a6bb3b465180db8ebecf26dd5dae6f6180bd87383b6b4428", size = 459405, upload-time = "2025-10-14T15:05:04.942Z" }, + { url = "https://files.pythonhosted.org/packages/be/90/9f4a65c0aec3ccf032703e6db02d89a157462fbb2cf20dd415128251cac0/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:059098c3a429f62fc98e8ec62b982230ef2c8df68c79e826e37b895bc359a9c0", size = 488976, upload-time = "2025-10-14T15:05:05.905Z" }, + { url = "https://files.pythonhosted.org/packages/37/57/ee347af605d867f712be7029bb94c8c071732a4b44792e3176fa3c612d39/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bfb5862016acc9b869bb57284e6cb35fdf8e22fe59f7548858e2f971d045f150", size = 595506, upload-time = "2025-10-14T15:05:06.906Z" }, + { url = "https://files.pythonhosted.org/packages/a8/78/cc5ab0b86c122047f75e8fc471c67a04dee395daf847d3e59381996c8707/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:319b27255aacd9923b8a276bb14d21a5f7ff82564c744235fc5eae58d95422ae", size = 474936, upload-time = "2025-10-14T15:05:07.906Z" }, + { url = "https://files.pythonhosted.org/packages/62/da/def65b170a3815af7bd40a3e7010bf6ab53089ef1b75d05dd5385b87cf08/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c755367e51db90e75b19454b680903631d41f9e3607fbd941d296a020c2d752d", size = 456147, upload-time = "2025-10-14T15:05:09.138Z" }, + { url = "https://files.pythonhosted.org/packages/57/99/da6573ba71166e82d288d4df0839128004c67d2778d3b566c138695f5c0b/watchfiles-1.1.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:c22c776292a23bfc7237a98f791b9ad3144b02116ff10d820829ce62dff46d0b", size = 630007, upload-time = "2025-10-14T15:05:10.117Z" }, + { url = "https://files.pythonhosted.org/packages/a8/51/7439c4dd39511368849eb1e53279cd3454b4a4dbace80bab88feeb83c6b5/watchfiles-1.1.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:3a476189be23c3686bc2f4321dd501cb329c0a0469e77b7b534ee10129ae6374", size = 622280, upload-time = "2025-10-14T15:05:11.146Z" }, + { url = "https://files.pythonhosted.org/packages/95/9c/8ed97d4bba5db6fdcdb2b298d3898f2dd5c20f6b73aee04eabe56c59677e/watchfiles-1.1.1-cp313-cp313-win32.whl", hash = "sha256:bf0a91bfb5574a2f7fc223cf95eeea79abfefa404bf1ea5e339c0c1560ae99a0", size = 272056, upload-time = "2025-10-14T15:05:12.156Z" }, + { url = "https://files.pythonhosted.org/packages/1f/f3/c14e28429f744a260d8ceae18bf58c1d5fa56b50d006a7a9f80e1882cb0d/watchfiles-1.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:52e06553899e11e8074503c8e716d574adeeb7e68913115c4b3653c53f9bae42", size = 288162, upload-time = "2025-10-14T15:05:13.208Z" }, + { url = "https://files.pythonhosted.org/packages/dc/61/fe0e56c40d5cd29523e398d31153218718c5786b5e636d9ae8ae79453d27/watchfiles-1.1.1-cp313-cp313-win_arm64.whl", hash = "sha256:ac3cc5759570cd02662b15fbcd9d917f7ecd47efe0d6b40474eafd246f91ea18", size = 277909, upload-time = "2025-10-14T15:05:14.49Z" }, + { url = "https://files.pythonhosted.org/packages/79/42/e0a7d749626f1e28c7108a99fb9bf524b501bbbeb9b261ceecde644d5a07/watchfiles-1.1.1-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:563b116874a9a7ce6f96f87cd0b94f7faf92d08d0021e837796f0a14318ef8da", size = 403389, upload-time = "2025-10-14T15:05:15.777Z" }, + { url = "https://files.pythonhosted.org/packages/15/49/08732f90ce0fbbc13913f9f215c689cfc9ced345fb1bcd8829a50007cc8d/watchfiles-1.1.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3ad9fe1dae4ab4212d8c91e80b832425e24f421703b5a42ef2e4a1e215aff051", size = 389964, upload-time = "2025-10-14T15:05:16.85Z" }, + { url = "https://files.pythonhosted.org/packages/27/0d/7c315d4bd5f2538910491a0393c56bf70d333d51bc5b34bee8e68e8cea19/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce70f96a46b894b36eba678f153f052967a0d06d5b5a19b336ab0dbbd029f73e", size = 448114, upload-time = "2025-10-14T15:05:17.876Z" }, + { url = "https://files.pythonhosted.org/packages/c3/24/9e096de47a4d11bc4df41e9d1e61776393eac4cb6eb11b3e23315b78b2cc/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:cb467c999c2eff23a6417e58d75e5828716f42ed8289fe6b77a7e5a91036ca70", size = 460264, upload-time = "2025-10-14T15:05:18.962Z" }, + { url = "https://files.pythonhosted.org/packages/cc/0f/e8dea6375f1d3ba5fcb0b3583e2b493e77379834c74fd5a22d66d85d6540/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:836398932192dae4146c8f6f737d74baeac8b70ce14831a239bdb1ca882fc261", size = 487877, upload-time = "2025-10-14T15:05:20.094Z" }, + { url = "https://files.pythonhosted.org/packages/ac/5b/df24cfc6424a12deb41503b64d42fbea6b8cb357ec62ca84a5a3476f654a/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:743185e7372b7bc7c389e1badcc606931a827112fbbd37f14c537320fca08620", size = 595176, upload-time = "2025-10-14T15:05:21.134Z" }, + { url = "https://files.pythonhosted.org/packages/8f/b5/853b6757f7347de4e9b37e8cc3289283fb983cba1ab4d2d7144694871d9c/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:afaeff7696e0ad9f02cbb8f56365ff4686ab205fcf9c4c5b6fdfaaa16549dd04", size = 473577, upload-time = "2025-10-14T15:05:22.306Z" }, + { url = "https://files.pythonhosted.org/packages/e1/f7/0a4467be0a56e80447c8529c9fce5b38eab4f513cb3d9bf82e7392a5696b/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3f7eb7da0eb23aa2ba036d4f616d46906013a68caf61b7fdbe42fc8b25132e77", size = 455425, upload-time = "2025-10-14T15:05:23.348Z" }, + { url = "https://files.pythonhosted.org/packages/8e/e0/82583485ea00137ddf69bc84a2db88bd92ab4a6e3c405e5fb878ead8d0e7/watchfiles-1.1.1-cp313-cp313t-musllinux_1_1_aarch64.whl", hash = "sha256:831a62658609f0e5c64178211c942ace999517f5770fe9436be4c2faeba0c0ef", size = 628826, upload-time = "2025-10-14T15:05:24.398Z" }, + { url = "https://files.pythonhosted.org/packages/28/9a/a785356fccf9fae84c0cc90570f11702ae9571036fb25932f1242c82191c/watchfiles-1.1.1-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:f9a2ae5c91cecc9edd47e041a930490c31c3afb1f5e6d71de3dc671bfaca02bf", size = 622208, upload-time = "2025-10-14T15:05:25.45Z" }, + { url = "https://files.pythonhosted.org/packages/c3/f4/0872229324ef69b2c3edec35e84bd57a1289e7d3fe74588048ed8947a323/watchfiles-1.1.1-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:d1715143123baeeaeadec0528bb7441103979a1d5f6fd0e1f915383fea7ea6d5", size = 404315, upload-time = "2025-10-14T15:05:26.501Z" }, + { url = "https://files.pythonhosted.org/packages/7b/22/16d5331eaed1cb107b873f6ae1b69e9ced582fcf0c59a50cd84f403b1c32/watchfiles-1.1.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:39574d6370c4579d7f5d0ad940ce5b20db0e4117444e39b6d8f99db5676c52fd", size = 390869, upload-time = "2025-10-14T15:05:27.649Z" }, + { url = "https://files.pythonhosted.org/packages/b2/7e/5643bfff5acb6539b18483128fdc0ef2cccc94a5b8fbda130c823e8ed636/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7365b92c2e69ee952902e8f70f3ba6360d0d596d9299d55d7d386df84b6941fb", size = 449919, upload-time = "2025-10-14T15:05:28.701Z" }, + { url = "https://files.pythonhosted.org/packages/51/2e/c410993ba5025a9f9357c376f48976ef0e1b1aefb73b97a5ae01a5972755/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bfff9740c69c0e4ed32416f013f3c45e2ae42ccedd1167ef2d805c000b6c71a5", size = 460845, upload-time = "2025-10-14T15:05:30.064Z" }, + { url = "https://files.pythonhosted.org/packages/8e/a4/2df3b404469122e8680f0fcd06079317e48db58a2da2950fb45020947734/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b27cf2eb1dda37b2089e3907d8ea92922b673c0c427886d4edc6b94d8dfe5db3", size = 489027, upload-time = "2025-10-14T15:05:31.064Z" }, + { url = "https://files.pythonhosted.org/packages/ea/84/4587ba5b1f267167ee715b7f66e6382cca6938e0a4b870adad93e44747e6/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:526e86aced14a65a5b0ec50827c745597c782ff46b571dbfe46192ab9e0b3c33", size = 595615, upload-time = "2025-10-14T15:05:32.074Z" }, + { url = "https://files.pythonhosted.org/packages/6a/0f/c6988c91d06e93cd0bb3d4a808bcf32375ca1904609835c3031799e3ecae/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:04e78dd0b6352db95507fd8cb46f39d185cf8c74e4cf1e4fbad1d3df96faf510", size = 474836, upload-time = "2025-10-14T15:05:33.209Z" }, + { url = "https://files.pythonhosted.org/packages/b4/36/ded8aebea91919485b7bbabbd14f5f359326cb5ec218cd67074d1e426d74/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c85794a4cfa094714fb9c08d4a218375b2b95b8ed1666e8677c349906246c05", size = 455099, upload-time = "2025-10-14T15:05:34.189Z" }, + { url = "https://files.pythonhosted.org/packages/98/e0/8c9bdba88af756a2fce230dd365fab2baf927ba42cd47521ee7498fd5211/watchfiles-1.1.1-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:74d5012b7630714b66be7b7b7a78855ef7ad58e8650c73afc4c076a1f480a8d6", size = 630626, upload-time = "2025-10-14T15:05:35.216Z" }, + { url = "https://files.pythonhosted.org/packages/2a/84/a95db05354bf2d19e438520d92a8ca475e578c647f78f53197f5a2f17aaf/watchfiles-1.1.1-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:8fbe85cb3201c7d380d3d0b90e63d520f15d6afe217165d7f98c9c649654db81", size = 622519, upload-time = "2025-10-14T15:05:36.259Z" }, + { url = "https://files.pythonhosted.org/packages/1d/ce/d8acdc8de545de995c339be67711e474c77d643555a9bb74a9334252bd55/watchfiles-1.1.1-cp314-cp314-win32.whl", hash = "sha256:3fa0b59c92278b5a7800d3ee7733da9d096d4aabcfabb9a928918bd276ef9b9b", size = 272078, upload-time = "2025-10-14T15:05:37.63Z" }, + { url = "https://files.pythonhosted.org/packages/c4/c9/a74487f72d0451524be827e8edec251da0cc1fcf111646a511ae752e1a3d/watchfiles-1.1.1-cp314-cp314-win_amd64.whl", hash = "sha256:c2047d0b6cea13b3316bdbafbfa0c4228ae593d995030fda39089d36e64fc03a", size = 287664, upload-time = "2025-10-14T15:05:38.95Z" }, + { url = "https://files.pythonhosted.org/packages/df/b8/8ac000702cdd496cdce998c6f4ee0ca1f15977bba51bdf07d872ebdfc34c/watchfiles-1.1.1-cp314-cp314-win_arm64.whl", hash = "sha256:842178b126593addc05acf6fce960d28bc5fae7afbaa2c6c1b3a7b9460e5be02", size = 277154, upload-time = "2025-10-14T15:05:39.954Z" }, + { url = "https://files.pythonhosted.org/packages/47/a8/e3af2184707c29f0f14b1963c0aace6529f9d1b8582d5b99f31bbf42f59e/watchfiles-1.1.1-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:88863fbbc1a7312972f1c511f202eb30866370ebb8493aef2812b9ff28156a21", size = 403820, upload-time = "2025-10-14T15:05:40.932Z" }, + { url = "https://files.pythonhosted.org/packages/c0/ec/e47e307c2f4bd75f9f9e8afbe3876679b18e1bcec449beca132a1c5ffb2d/watchfiles-1.1.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:55c7475190662e202c08c6c0f4d9e345a29367438cf8e8037f3155e10a88d5a5", size = 390510, upload-time = "2025-10-14T15:05:41.945Z" }, + { url = "https://files.pythonhosted.org/packages/d5/a0/ad235642118090f66e7b2f18fd5c42082418404a79205cdfca50b6309c13/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f53fa183d53a1d7a8852277c92b967ae99c2d4dcee2bfacff8868e6e30b15f7", size = 448408, upload-time = "2025-10-14T15:05:43.385Z" }, + { url = "https://files.pythonhosted.org/packages/df/85/97fa10fd5ff3332ae17e7e40e20784e419e28521549780869f1413742e9d/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6aae418a8b323732fa89721d86f39ec8f092fc2af67f4217a2b07fd3e93c6101", size = 458968, upload-time = "2025-10-14T15:05:44.404Z" }, + { url = "https://files.pythonhosted.org/packages/47/c2/9059c2e8966ea5ce678166617a7f75ecba6164375f3b288e50a40dc6d489/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f096076119da54a6080e8920cbdaac3dbee667eb91dcc5e5b78840b87415bd44", size = 488096, upload-time = "2025-10-14T15:05:45.398Z" }, + { url = "https://files.pythonhosted.org/packages/94/44/d90a9ec8ac309bc26db808a13e7bfc0e4e78b6fc051078a554e132e80160/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:00485f441d183717038ed2e887a7c868154f216877653121068107b227a2f64c", size = 596040, upload-time = "2025-10-14T15:05:46.502Z" }, + { url = "https://files.pythonhosted.org/packages/95/68/4e3479b20ca305cfc561db3ed207a8a1c745ee32bf24f2026a129d0ddb6e/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a55f3e9e493158d7bfdb60a1165035f1cf7d320914e7b7ea83fe22c6023b58fc", size = 473847, upload-time = "2025-10-14T15:05:47.484Z" }, + { url = "https://files.pythonhosted.org/packages/4f/55/2af26693fd15165c4ff7857e38330e1b61ab8c37d15dc79118cdba115b7a/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c91ed27800188c2ae96d16e3149f199d62f86c7af5f5f4d2c61a3ed8cd3666c", size = 455072, upload-time = "2025-10-14T15:05:48.928Z" }, + { url = "https://files.pythonhosted.org/packages/66/1d/d0d200b10c9311ec25d2273f8aad8c3ef7cc7ea11808022501811208a750/watchfiles-1.1.1-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:311ff15a0bae3714ffb603e6ba6dbfba4065ab60865d15a6ec544133bdb21099", size = 629104, upload-time = "2025-10-14T15:05:49.908Z" }, + { url = "https://files.pythonhosted.org/packages/e3/bd/fa9bb053192491b3867ba07d2343d9f2252e00811567d30ae8d0f78136fe/watchfiles-1.1.1-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:a916a2932da8f8ab582f242c065f5c81bed3462849ca79ee357dd9551b0e9b01", size = 622112, upload-time = "2025-10-14T15:05:50.941Z" }, + { url = "https://files.pythonhosted.org/packages/ba/4c/a888c91e2e326872fa4705095d64acd8aa2fb9c1f7b9bd0588f33850516c/watchfiles-1.1.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:17ef139237dfced9da49fb7f2232c86ca9421f666d78c264c7ffca6601d154c3", size = 409611, upload-time = "2025-10-14T15:06:05.809Z" }, + { url = "https://files.pythonhosted.org/packages/1e/c7/5420d1943c8e3ce1a21c0a9330bcf7edafb6aa65d26b21dbb3267c9e8112/watchfiles-1.1.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:672b8adf25b1a0d35c96b5888b7b18699d27d4194bac8beeae75be4b7a3fc9b2", size = 396889, upload-time = "2025-10-14T15:06:07.035Z" }, + { url = "https://files.pythonhosted.org/packages/0c/e5/0072cef3804ce8d3aaddbfe7788aadff6b3d3f98a286fdbee9fd74ca59a7/watchfiles-1.1.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77a13aea58bc2b90173bc69f2a90de8e282648939a00a602e1dc4ee23e26b66d", size = 451616, upload-time = "2025-10-14T15:06:08.072Z" }, + { url = "https://files.pythonhosted.org/packages/83/4e/b87b71cbdfad81ad7e83358b3e447fedd281b880a03d64a760fe0a11fc2e/watchfiles-1.1.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b495de0bb386df6a12b18335a0285dda90260f51bdb505503c02bcd1ce27a8b", size = 458413, upload-time = "2025-10-14T15:06:09.209Z" }, + { url = "https://files.pythonhosted.org/packages/d3/8e/e500f8b0b77be4ff753ac94dc06b33d8f0d839377fee1b78e8c8d8f031bf/watchfiles-1.1.1-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:db476ab59b6765134de1d4fe96a1a9c96ddf091683599be0f26147ea1b2e4b88", size = 408250, upload-time = "2025-10-14T15:06:10.264Z" }, + { url = "https://files.pythonhosted.org/packages/bd/95/615e72cd27b85b61eec764a5ca51bd94d40b5adea5ff47567d9ebc4d275a/watchfiles-1.1.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:89eef07eee5e9d1fda06e38822ad167a044153457e6fd997f8a858ab7564a336", size = 396117, upload-time = "2025-10-14T15:06:11.28Z" }, + { url = "https://files.pythonhosted.org/packages/c9/81/e7fe958ce8a7fb5c73cc9fb07f5aeaf755e6aa72498c57d760af760c91f8/watchfiles-1.1.1-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce19e06cbda693e9e7686358af9cd6f5d61312ab8b00488bc36f5aabbaf77e24", size = 450493, upload-time = "2025-10-14T15:06:12.321Z" }, + { url = "https://files.pythonhosted.org/packages/6e/d4/ed38dd3b1767193de971e694aa544356e63353c33a85d948166b5ff58b9e/watchfiles-1.1.1-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e6f39af2eab0118338902798b5aa6664f46ff66bc0280de76fca67a7f262a49", size = 457546, upload-time = "2025-10-14T15:06:13.372Z" }, +] + [[package]] name = "websocket-client" version = "1.9.0" diff --git a/specs/README.md b/specs/README.md index c56523504..1d04d34da 100644 --- a/specs/README.md +++ b/specs/README.md @@ -122,6 +122,7 @@ the sandbox endpoint for the egress port and then calling the sidecar endpoint d **Main Endpoints:** - `GET /policy` - Get the current egress policy - `PATCH /policy` - Merge new egress rules into the current policy +- `DELETE /policy` - Remove specific egress rules from the current policy by target ## Technical Features diff --git a/specs/README_zh.md b/specs/README_zh.md index f251cb0da..0f4ef1c6d 100644 --- a/specs/README_zh.md +++ b/specs/README_zh.md @@ -121,6 +121,7 @@ **主要端点:** - `GET /policy` - 获取当前 egress 策略 - `PATCH /policy` - 将新的 egress 规则合并到当前策略 +- `DELETE /policy` - 按 target 删除当前策略中的指定 egress 规则 ## 技术特性 diff --git a/specs/egress-api.yaml b/specs/egress-api.yaml index f7dce4ec5..fba36525f 100644 --- a/specs/egress-api.yaml +++ b/specs/egress-api.yaml @@ -109,6 +109,48 @@ paths: $ref: '#/components/responses/Unauthorized' '500': $ref: '#/components/responses/InternalServerError' + delete: + tags: [Policy] + summary: Delete egress rules + description: | + Remove specific egress rules from the currently enforced policy by target. + + - Accepts a list of target strings (FQDNs or wildcard domains). + - Matching rules are removed; targets not found in the current policy + are silently ignored (idempotent). + requestBody: + required: true + content: + application/json: + schema: + type: array + minItems: 1 + items: + type: string + description: FQDN or wildcard domain to remove from the policy. + example: + - bad.example.com + - "*.blocked.org" + responses: + '200': + description: Rules removed successfully. + content: + application/json: + schema: + $ref: '#/components/schemas/PolicyStatusResponse' + examples: + removed: + summary: Rules removed + value: + status: ok + mode: deny_all + enforcementMode: dns + '400': + $ref: '#/components/responses/BadRequest' + '401': + $ref: '#/components/responses/Unauthorized' + '500': + $ref: '#/components/responses/InternalServerError' components: responses: BadRequest: diff --git a/specs/sandbox-lifecycle.yml b/specs/sandbox-lifecycle.yml index 52d06e8b8..9809d7d99 100644 --- a/specs/sandbox-lifecycle.yml +++ b/specs/sandbox-lifecycle.yml @@ -1155,15 +1155,22 @@ components: CreateSandboxRequest: type: object - required: [resourceLimits] description: | - Request to create a new sandbox from either a container image or a snapshot. - Exactly one of `image` or `snapshotId` must be provided. + Request to create a new sandbox from either a container image, a snapshot, + or a pre-configured pool (via `extensions.poolRef`). + + **Standard mode**: Exactly one of `image` or `snapshotId` must be provided, + and `resourceLimits` is required. When `image` is provided, `entrypoint` is required. When `snapshotId` is provided, `entrypoint` is optional. If omitted, the server defaults the sandbox entrypoint to `["tail", "-f", "/dev/null"]`. + **Pool mode**: When `extensions.poolRef` is set, the sandbox is created from + a pre-configured pool. In this case `image`, `entrypoint`, and + `resourceLimits` are all optional (defined by the Pool CRD template). + `snapshotId` must not be provided together with `poolRef`. + **Note**: API Key authentication is required via the `OPEN-SANDBOX-API-KEY` header. properties: image: @@ -1204,6 +1211,8 @@ components: $ref: '#/components/schemas/ResourceLimits' description: | Runtime resource constraints for the sandbox instance. + Required when `extensions.poolRef` is not set. + Optional when using pool mode (resource limits are defined by the Pool CRD template). SDK clients should provide sensible defaults (e.g., cpu: "500m", memory: "512Mi"). env: diff --git a/tests/csharp/OpenSandbox.E2ETests/SandboxE2ETests.cs b/tests/csharp/OpenSandbox.E2ETests/SandboxE2ETests.cs index 580748a83..b38daf351 100644 --- a/tests/csharp/OpenSandbox.E2ETests/SandboxE2ETests.cs +++ b/tests/csharp/OpenSandbox.E2ETests/SandboxE2ETests.cs @@ -163,7 +163,7 @@ public async Task Sandbox_Create_With_NetworkPolicy_Get_And_Patch_Egress() try { - await Task.Delay(5000); + await WaitUntilEgressBlocksAsync(policySandbox, "https://www.github.com", TimeSpan.FromSeconds(30)); var initialPolicy = await policySandbox.GetEgressPolicyAsync(); Assert.NotNull(initialPolicy); @@ -184,7 +184,7 @@ await policySandbox.PatchEgressRulesAsync(new List new() { Action = NetworkRuleAction.Allow, Target = "www.github.com" }, new() { Action = NetworkRuleAction.Deny, Target = "pypi.org" } }); - await Task.Delay(2000); + await WaitUntilEgressBlocksAsync(policySandbox, "https://pypi.org", TimeSpan.FromSeconds(30)); var patchedPolicy = await policySandbox.GetEgressPolicyAsync(); Assert.NotNull(patchedPolicy.Egress); @@ -233,7 +233,7 @@ public async Task Sandbox_Create_With_NetworkPolicy_Get_And_Patch_Egress_Via_Ser try { - await Task.Delay(5000); + await WaitUntilEgressBlocksAsync(policySandbox, "https://www.github.com", TimeSpan.FromSeconds(30)); var egressEndpoint = await policySandbox.GetEndpointAsync(Constants.DefaultEgressPort); Assert.Contains( @@ -259,7 +259,7 @@ await policySandbox.PatchEgressRulesAsync(new List new() { Action = NetworkRuleAction.Allow, Target = "www.github.com" }, new() { Action = NetworkRuleAction.Deny, Target = "pypi.org" } }); - await Task.Delay(2000); + await WaitUntilEgressBlocksAsync(policySandbox, "https://pypi.org", TimeSpan.FromSeconds(30)); var patchedPolicy = await policySandbox.GetEgressPolicyAsync(); Assert.NotNull(patchedPolicy.Egress); @@ -1112,6 +1112,35 @@ private static async Task RunWithRetryAsync(Sandbox sandbox, string c } return result!; } + + /// + /// Polls curl against until the egress sidecar blocks + /// it (Execution.Error becomes non-null), or the timeout elapses. NetworkPolicy + /// sidecars sometimes accept connections before iptables/proxy rules apply, + /// so a fixed sleep is flaky. + /// + private static async Task WaitUntilEgressBlocksAsync(Sandbox sandbox, string url, TimeSpan timeout) + { + var deadline = DateTime.UtcNow + timeout; + Execution? last = null; + while (DateTime.UtcNow < deadline) + { + try + { + last = await sandbox.Commands.RunAsync($"curl -I {url}"); + if (last?.Error != null) + { + return; + } + } + catch + { + // Transient SDK/SSE errors during sidecar warmup — keep polling. + } + await Task.Delay(500); + } + Assert.Fail($"Egress policy did not block {url} within {timeout} (last error={last?.Error?.ToString() ?? "null"})"); + } } public sealed class SandboxE2ETestFixture : IAsyncLifetime diff --git a/tests/go/e2e_test.go b/tests/go/e2e_test.go index bc204b0da..631c571ac 100644 --- a/tests/go/e2e_test.go +++ b/tests/go/e2e_test.go @@ -117,8 +117,16 @@ func TestE2E_FullLifecycle(t *testing.T) { } execClient := opensandbox.NewExecdClient(execdURL, execToken) - err = execClient.Ping(ctx) - require.NoError(t, err) + // This test bypasses the SDK's high-level CreateSandbox helper (which calls + // WaitUntilReady) and pings execd directly through the server-side proxy. + // The state-Running flag is satisfied as soon as the container is up, but + // execd's HTTP routes may register a few ms later and the proxy can drop + // the very first connection it sees ("connection reset by peer"). Poll + // until ping succeeds — real users go through CreateSandbox which already + // handles this. + require.Eventually(t, func() bool { + return execClient.Ping(ctx) == nil + }, 30*time.Second, 500*time.Millisecond, "execd ping never succeeded") t.Log("Execd ping: OK") // 6. Test Execd — run a command with SSE streaming @@ -131,7 +139,6 @@ func TestE2E_FullLifecycle(t *testing.T) { return nil }) require.NoError(t, err) - t.Logf("Command raw output (%d bytes): %q", output.Len(), output.String()) // 7. Test Execd — file operations fileInfoMap, err := execClient.GetFileInfo(ctx, "/etc/os-release") diff --git a/tests/java/src/test/java/com/alibaba/opensandbox/e2e/SandboxE2ETest.java b/tests/java/src/test/java/com/alibaba/opensandbox/e2e/SandboxE2ETest.java index 5c51e0d5f..ba0449f27 100644 --- a/tests/java/src/test/java/com/alibaba/opensandbox/e2e/SandboxE2ETest.java +++ b/tests/java/src/test/java/com/alibaba/opensandbox/e2e/SandboxE2ETest.java @@ -270,11 +270,10 @@ void testSandboxCreateWithNetworkPolicy() { .readyTimeout(Duration.ofSeconds(60)) .networkPolicy(networkPolicy) .build(); - // Wait for NetworkPolicy sidecar to be fully initialized - try { - Thread.sleep(2000); - } catch (InterruptedException ignored) { - } + // Wait for NetworkPolicy sidecar to be fully initialized. + // The sidecar may accept the sandbox before iptables/proxy rules apply, + // so poll a denied target until the policy actually blocks it. + waitUntilEgressBlocks(policySandbox, "https://www.github.com", Duration.ofSeconds(30)); try { NetworkPolicy initialPolicy = policySandbox.getEgressPolicy(); @@ -319,10 +318,8 @@ void testSandboxCreateWithNetworkPolicy() { .target("pypi.org") .build())); - try { - Thread.sleep(2000); - } catch (InterruptedException ignored) { - } + // Poll until the patched rule takes effect (pypi now blocked). + waitUntilEgressBlocks(policySandbox, "https://pypi.org", Duration.ofSeconds(30)); NetworkPolicy patchedPolicy = policySandbox.getEgressPolicy(); assertNotNull(patchedPolicy); @@ -393,10 +390,8 @@ void testSandboxCreateWithNetworkPolicyViaServerProxy() { .readyTimeout(Duration.ofSeconds(60)) .networkPolicy(networkPolicy) .build(); - try { - Thread.sleep(2000); - } catch (InterruptedException ignored) { - } + // Wait for NetworkPolicy sidecar/iptables rules to be active. + waitUntilEgressBlocks(policySandbox, "https://www.github.com", Duration.ofSeconds(30)); try { SandboxEndpoint egressEndpoint = policySandbox.getEndpoint(18080); @@ -447,10 +442,8 @@ void testSandboxCreateWithNetworkPolicyViaServerProxy() { .target("pypi.org") .build())); - try { - Thread.sleep(2000); - } catch (InterruptedException ignored) { - } + // Poll until patched rule applied (pypi now blocked). + waitUntilEgressBlocks(policySandbox, "https://pypi.org", Duration.ofSeconds(30)); NetworkPolicy patchedPolicy = policySandbox.getEgressPolicy(); assertNotNull(patchedPolicy.getEgress()); @@ -1597,4 +1590,33 @@ private Execution runWithRetry(Sandbox sandbox, String command, int maxAttempts, } return result; } + + /** + * Polls the sandbox running curl until the given URL is blocked by the + * network policy. Returns once curl reports an error (egress active), or + * fails the test if the timeout elapses. + */ + private void waitUntilEgressBlocks(Sandbox sandbox, String url, Duration timeout) { + long deadline = System.currentTimeMillis() + timeout.toMillis(); + Execution last = null; + while (System.currentTimeMillis() < deadline) { + try { + last = sandbox.commands().run( + RunCommandRequest.builder().command("curl -I " + url).build()); + if (last != null && last.getError() != null) { + return; + } + } catch (Exception ignored) { + // Transient SDK/SSE errors during sidecar warmup — keep polling. + } + try { + Thread.sleep(500); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + break; + } + } + fail("Egress policy did not block " + url + " within " + timeout + + " (last execution error=" + (last == null ? "null" : last.getError()) + ")"); + } } diff --git a/tests/python/tests/test_sandbox_e2e.py b/tests/python/tests/test_sandbox_e2e.py index 0eb331d7c..3815ed74f 100644 --- a/tests/python/tests/test_sandbox_e2e.py +++ b/tests/python/tests/test_sandbox_e2e.py @@ -462,6 +462,82 @@ async def test_01aa_network_policy_get_and_patch(self): pass await sandbox.close() + @pytest.mark.timeout(180) + @pytest.mark.order(1) + async def test_01ac_network_policy_delete(self): + if is_kubernetes_runtime(): + pytest.skip("Network policy is not covered in the Kubernetes runtime suite") + + logger.info("=" * 80) + logger.info("TEST 1ac: networkPolicy delete (async)") + logger.info("=" * 80) + + cfg = create_connection_config() + sandbox = await Sandbox.create( + image=SandboxImageSpec(get_sandbox_image()), + resource=get_e2e_sandbox_resource(), + connection_config=cfg, + timeout=timedelta(minutes=5), + ready_timeout=timedelta(seconds=30), + network_policy=NetworkPolicy( + defaultAction="deny", + egress=[ + NetworkRule(action="allow", target="pypi.org"), + NetworkRule(action="allow", target="www.github.com"), + ], + ), + ) + try: + await asyncio.sleep(5) + + # Baseline: both targets reachable under deny-default policy. + initial_policy = await sandbox.get_egress_policy() + assert initial_policy.egress is not None + assert any(r.target == "pypi.org" and r.action == "allow" for r in initial_policy.egress) + assert any( + r.target == "www.github.com" and r.action == "allow" for r in initial_policy.egress + ) + pypi_ok = await sandbox.commands.run("curl -I https://pypi.org") + assert pypi_ok.error is None + github_ok = await sandbox.commands.run("curl -I https://www.github.com") + assert github_ok.error is None + + # Delete the github allow-rule. Include a non-existent target to + # confirm DELETE is idempotent (no error, silently ignored). + await sandbox.delete_egress_rules(["www.github.com", "nonexistent.example.com"]) + await asyncio.sleep(2) + + deleted_policy = await sandbox.get_egress_policy() + assert deleted_policy.egress is not None + assert not any( + r.target == "www.github.com" for r in deleted_policy.egress + ), "www.github.com rule should be removed" + assert any( + r.target == "pypi.org" and r.action == "allow" for r in deleted_policy.egress + ), "pypi.org rule should remain (other targets untouched)" + assert deleted_policy.default_action == "deny", "defaultAction must be preserved" + + # github now falls under default-deny; pypi still allowed. + github_blocked = await sandbox.commands.run("curl -I https://www.github.com") + assert github_blocked.error is not None + pypi_still_ok = await sandbox.commands.run("curl -I https://pypi.org") + assert pypi_still_ok.error is None + + # Second delete of the same target is a no-op. + await sandbox.delete_egress_rules(["www.github.com"]) + await asyncio.sleep(1) + unchanged_policy = await sandbox.get_egress_policy() + assert unchanged_policy.egress is not None + assert {r.target for r in unchanged_policy.egress} == { + r.target for r in deleted_policy.egress + } + finally: + try: + await sandbox.kill() + except Exception: + pass + await sandbox.close() + @pytest.mark.timeout(240) @pytest.mark.order(1) async def test_01ab_network_policy_get_and_patch_with_server_proxy(self):