OpenHands · hieptl · Jun 9, 2026 · Jun 9, 2026 · Jun 9, 2026 · Jun 9, 2026
diff --git a/frontend/src/__tests__/components/automations/detail/run-status-badge.test.tsx b/frontend/src/__tests__/components/automations/detail/run-status-badge.test.tsx
@@ -25,4 +25,11 @@ describe("RunStatusBadge", () => {
     render(<RunStatusBadge status={AutomationRunStatus.RUNNING} />);
     expect(screen.getByText("AUTOMATIONS$DETAIL$RUNNING")).toBeInTheDocument();
   });
+
+  it("renders skipped/limit-reached label for skipped status", () => {
+    render(<RunStatusBadge status={AutomationRunStatus.SKIPPED} />);
+    expect(
+      screen.getByText("AUTOMATIONS$DETAIL$SKIPPED_LIMIT_REACHED"),
+    ).toBeInTheDocument();
+  });
 });
diff --git a/frontend/src/components/automations/detail/run-status-badge.tsx b/frontend/src/components/automations/detail/run-status-badge.tsx
@@ -30,6 +30,10 @@ const statusConfig: Record<
     label: I18nKey.AUTOMATIONS$DETAIL$RUNNING,
     style: "border-border bg-surface-elevated text-content-muted",
   },
+  [AutomationRunStatus.SKIPPED]: {
+    label: I18nKey.AUTOMATIONS$DETAIL$SKIPPED_LIMIT_REACHED,
+    style: "border-border bg-surface-elevated text-content-muted",
+  },
 };
 
 function StatusIcon({ status }: { status: AutomationRunStatus }) {

diff --git a/frontend/src/i18n/translation.json b/frontend/src/i18n/translation.json
@@ -696,6 +696,23 @@
     "tr": "Zamanlama",
     "uk": "Розклад"
   },
+  "AUTOMATIONS$DETAIL$SKIPPED_LIMIT_REACHED": {
+    "en": "Skipped – Limit Reached",
+    "ja": "スキップ – 上限に到達",
+    "zh-CN": "已跳过 – 已达上限",
+    "zh-TW": "已略過 – 已達上限",
+    "ko-KR": "건너뜀 – 한도 도달",
+    "no": "Hoppet over – grense nådd",
+    "ar": "تم التخطّي – تم بلوغ الحد",
+    "de": "Übersprungen – Limit erreicht",
+    "fr": "Ignoré – limite atteinte",
+    "it": "Saltato – limite raggiunto",
+    "pt": "Ignorado – limite atingido",
+    "es": "Omitido – límite alcanzado",
+    "ca": "Omès – límit assolit",
+    "tr": "Atlandı – sınıra ulaşıldı",
+    "uk": "Пропущено – ліміт досягнуто"
+  },
   "AUTOMATIONS$DETAIL$SUCCESSFUL": {
     "en": "Successful",
     "ja": "成功",

diff --git a/frontend/src/types/automation.ts b/frontend/src/types/automation.ts
@@ -34,6 +34,7 @@ export enum AutomationRunStatus {
   RUNNING = "RUNNING",
   COMPLETED = "COMPLETED",
   FAILED = "FAILED",
+  SKIPPED = "SKIPPED",
 }
 
 export interface AutomationRun {

diff --git a/openhands/automation/backends/cloud.py b/openhands/automation/backends/cloud.py
@@ -21,6 +21,7 @@
 
 from openhands.automation.backends.base import ExecutionBackend, ExecutionContext
 from openhands.automation.config import get_config
+from openhands.automation.exceptions import ConcurrencyLimitReachedError
 from openhands.automation.models import AutomationRun
 from openhands.automation.utils.api_key import get_api_key_for_automation_run
 from openhands.automation.utils.sandbox import (
@@ -52,6 +53,27 @@ def _is_auth_error(exc: BaseException) -> bool:
     return False
 
 
+def _concurrency_limit_detail(resp: httpx.Response) -> dict | None:
+    """Return the CONCURRENCY_LIMIT_REACHED detail dict if the response is the
+    organization concurrent-sandbox limit 429, else None.
+
+    The OpenHands API raises this as a FastAPI HTTPException, so the body is
+    ``{"detail": {"error": "CONCURRENCY_LIMIT_REACHED", ...}}``; we also tolerate
+    a flat ``{"error": ...}`` shape. This is distinct from a transient
+    rate-limit 429, which should still be retried (see ``_is_rate_limit_error``).
+    """
+    if resp.status_code != 429:
+        return None
+    try:
+        body = resp.json()
+    except Exception:
+        return None
+    detail = body.get("detail", body) if isinstance(body, dict) else None
+    if isinstance(detail, dict) and detail.get("error") == "CONCURRENCY_LIMIT_REACHED":
+        return detail
+    return None
+
+
 class CloudSandboxBackend(ExecutionBackend):
     """Execution backend that creates Cloud sandboxes per run.
 
@@ -269,6 +291,14 @@ async def _do_create():
             resp = await client.post(
                 f"{self.api_url}/api/v1/sandboxes", headers=headers
             )
+            # A concurrency-limit 429 is not transient: retrying won't free a
+            # slot. Raise a non-HTTPStatusError so the retry predicate skips it
+            # and the dispatcher can mark the run SKIPPED instead of FAILED.
+            detail = _concurrency_limit_detail(resp)
+            if detail is not None:
+                raise ConcurrencyLimitReachedError(
+                    detail.get("message") or "Concurrency limit reached"
+                )
             resp.raise_for_status()
             return resp.json()["id"]
 

diff --git a/openhands/automation/dispatcher.py b/openhands/automation/dispatcher.py
@@ -29,7 +29,11 @@
 from openhands.automation.backends import get_backend
 from openhands.automation.config import ServiceSettings, get_config
 from openhands.automation.db import using_sqlite
-from openhands.automation.exceptions import PermanentDispatchError, TarballNotFoundError
+from openhands.automation.exceptions import (
+    ConcurrencyLimitReachedError,
+    PermanentDispatchError,
+    TarballNotFoundError,
+)
 from openhands.automation.execution import execute_in_context
 from openhands.automation.models import (
     Automation,
@@ -196,6 +200,14 @@ async def _fail(error: str, disable: bool = False) -> None:
     # Note: This also initializes backend state (e.g., API key for cloud mode)
     try:
         ctx = await backend.get_execution_context(client)
+    except ConcurrencyLimitReachedError as exc:
+        logger.warning(
+            "Run skipped — organization concurrency limit reached: %s",
+            exc,
+            extra=_log_ctx(),
+        )
+        await mark_run_terminal(session_factory, run, AutomationRunStatus.SKIPPED)
+        return
     except Exception:
         logger.exception("Failed to get execution context", extra=_log_ctx())
         await _fail("Failed to get execution context")

diff --git a/openhands/automation/exceptions.py b/openhands/automation/exceptions.py
@@ -27,3 +27,14 @@ class TarballNotFoundError(PermanentDispatchError):
     """
 
     pass
+
+
+class ConcurrencyLimitReachedError(Exception):
+    """The organization/workspace has reached its concurrent-sandbox limit.
+
+    Unlike PermanentDispatchError this is a *transient*, org-level condition: a
+    later run may succeed once a concurrent slot frees up. The run is marked
+    SKIPPED (not FAILED) and the automation is left enabled.
+    """
+
+    pass
diff --git a/openhands/automation/models.py b/openhands/automation/models.py
@@ -41,6 +41,7 @@ class AutomationRunStatus(enum.Enum):
     COMPLETED = "COMPLETED"
     FAILED = "FAILED"
     CANCELLED = "CANCELLED"
+    SKIPPED = "SKIPPED"
 
 
 class Automation(Base):

diff --git a/openhands/automation/schemas.py b/openhands/automation/schemas.py
@@ -211,6 +211,7 @@ class RunStatus(StrEnum):
     COMPLETED = "COMPLETED"
     FAILED = "FAILED"
     CANCELLED = "CANCELLED"
+    SKIPPED = "SKIPPED"
 
 
 def _validate_command_string(

diff --git a/openhands/automation/utils/run.py b/openhands/automation/utils/run.py
@@ -149,6 +149,7 @@ async def mark_run_status(
         AutomationRunStatus.COMPLETED,
         AutomationRunStatus.FAILED,
         AutomationRunStatus.CANCELLED,
+        AutomationRunStatus.SKIPPED,
     ):
         values["completed_at"] = now
         run.completed_at = now

diff --git a/tests/test_backends.py b/tests/test_backends.py
@@ -2,6 +2,7 @@
 
 from unittest.mock import AsyncMock, MagicMock, patch
 
+import httpx
 import pytest
 
 from openhands.automation.backends import (
@@ -10,6 +11,8 @@
     LocalAgentServerBackend,
     get_backend,
 )
+from openhands.automation.backends.cloud import _concurrency_limit_detail
+from openhands.automation.exceptions import ConcurrencyLimitReachedError
 
 
 class TestExecutionContext:
@@ -469,3 +472,101 @@ def test_cloud_mode(self, monkeypatch, mock_run):
         backend = get_backend(mock_run)
         assert isinstance(backend, CloudSandboxBackend)
         assert backend.api_url == "https://app.all-hands.dev"
+
+
+class TestConcurrencyLimitDetection:
+    """Tests for `_concurrency_limit_detail`, the discriminator that tells an
+    organization concurrency-limit 429 (→ mark run SKIPPED) apart from a
+    transient rate-limit 429 (→ retry as before)."""
+
+    @staticmethod
+    def _resp(status: int, *, json=None, raw: bytes | None = None) -> httpx.Response:
+        req = httpx.Request("POST", "https://app.all-hands.dev/api/v1/sandboxes")
+        if raw is not None:
+            return httpx.Response(status, request=req, content=raw)
+        return httpx.Response(status, request=req, json=json)
+
+    def test_detects_nested_fastapi_detail(self):
+        """The real shape: FastAPI nests the HTTPException detail under "detail"."""
+        resp = self._resp(
+            429,
+            json={
+                "detail": {
+                    "error": "CONCURRENCY_LIMIT_REACHED",
+                    "message": "You have reached your limit of 3 ...",
+                    "limit": 3,
+                    "current": 3,
+                }
+            },
+        )
+        detail = _concurrency_limit_detail(resp)
+        assert detail is not None
+        assert detail["limit"] == 3
+
+    def test_detects_flat_detail(self):
+        """A non-nested {"error": ...} body is also tolerated."""
+        resp = self._resp(429, json={"error": "CONCURRENCY_LIMIT_REACHED"})
+        assert _concurrency_limit_detail(resp) is not None
+
+    def test_ignores_transient_rate_limit_429(self):
+        """A generic 429 with a string detail is a transient rate limit."""
+        resp = self._resp(429, json={"detail": "Rate limited, slow down"})
+        assert _concurrency_limit_detail(resp) is None
+
+    def test_ignores_429_without_marker(self):
+        """A 429 whose detail lacks the marker is not a concurrency limit."""
+        resp = self._resp(429, json={"detail": {"error": "SOMETHING_ELSE"}})
+        assert _concurrency_limit_detail(resp) is None
+
+    def test_ignores_non_json_429(self):
+        """A non-JSON 429 body never matches (and does not raise)."""
+        resp = self._resp(429, raw=b"<html>too many requests</html>")
+        assert _concurrency_limit_detail(resp) is None
+
+    def test_ignores_non_429(self):
+        """Only 429 responses can be a concurrency limit."""
+        resp = self._resp(200, json={"id": "sandbox-abc"})
+        assert _concurrency_limit_detail(resp) is None
+
+
+class TestCloudSandboxConcurrencyLimit:
+    """Tests that `_create_sandbox` surfaces the org concurrency limit as
+    `ConcurrencyLimitReachedError` without retrying it."""
+
+    @pytest.fixture
+    def mock_run(self):
+        run = MagicMock()
+        run.sandbox_id = None
+        run.keep_alive = False
+        run.bash_command_id = None
+        return run
+
+    @pytest.mark.asyncio
+    async def test_create_sandbox_raises_and_does_not_retry(self, mock_run):
+        """A concurrency-limit 429 raises ConcurrencyLimitReachedError on the
+        first attempt — retrying cannot free a slot, so it must not be retried."""
+        backend = CloudSandboxBackend(api_url="https://app.all-hands.dev", run=mock_run)
+
+        req = httpx.Request("POST", "https://app.all-hands.dev/api/v1/sandboxes")
+        resp = httpx.Response(
+            429,
+            request=req,
+            json={
+                "detail": {
+                    "error": "CONCURRENCY_LIMIT_REACHED",
+                    "message": "Reached limit of 3 concurrent conversations.",
+                    "limit": 3,
+                    "current": 3,
+                }
+            },
+        )
+        client = MagicMock()
+        client.post = AsyncMock(return_value=resp)
+
+        with pytest.raises(
+            ConcurrencyLimitReachedError, match="concurrent conversations"
+        ):
+            await backend._create_sandbox(client, {"Authorization": "Bearer x"})
+
+        # No retry: the sandbox API was hit exactly once.
+        assert client.post.await_count == 1