diff --git a/browser-use-python/src/browser_use_sdk/v3/client.py b/browser-use-python/src/browser_use_sdk/v3/client.py index 33615689..4cd727cc 100644 --- a/browser-use-python/src/browser_use_sdk/v3/client.py +++ b/browser-use-python/src/browser_use_sdk/v3/client.py @@ -1,7 +1,6 @@ from __future__ import annotations import os -from collections.abc import Awaitable from typing import Any, TypeVar, overload from uuid import UUID @@ -63,6 +62,7 @@ def run( workspace_id: str | None = ..., enable_recording: bool | None = ..., cache_script: bool | None = ..., + auto_heal: bool | None = ..., **extra: Any, ) -> SessionResult[T]: ... @@ -81,6 +81,7 @@ def run( workspace_id: str | None = ..., enable_recording: bool | None = ..., cache_script: bool | None = ..., + auto_heal: bool | None = ..., **extra: Any, ) -> SessionResult[T]: ... @@ -98,6 +99,7 @@ def run( workspace_id: str | None = ..., enable_recording: bool | None = ..., cache_script: bool | None = ..., + auto_heal: bool | None = ..., **extra: Any, ) -> SessionResult[str]: ... @@ -116,6 +118,7 @@ def run( workspace_id: str | None = None, enable_recording: bool | None = None, cache_script: bool | None = None, + auto_heal: bool | None = None, **extra: Any, ) -> Any: """Run a task and block until complete. Returns a SessionResult. @@ -127,7 +130,9 @@ def run( - False: force-disable caching. When active, the first call runs the full agent and saves a reusable script. - Subsequent calls with the same task template execute the script with $0 LLM cost. + Subsequent calls with the same task template execute the script. By default, + auto_heal may use lightweight validation or regenerate the script if output + looks wrong; set auto_heal=False to return the raw script output. """ if cache_script is True and not workspace_id: raise ValueError("workspace_id is required when cache_script=True") @@ -158,6 +163,7 @@ def run( workspace_id=workspace_id, enable_recording=enable_recording, cache_script=cache_script, + auto_heal=auto_heal, **extra, ) return _poll_output(self.sessions, str(data.id), resolved_schema) @@ -177,6 +183,7 @@ def stream( workspace_id: str | None = None, enable_recording: bool | None = None, cache_script: bool | None = None, + auto_heal: bool | None = None, **extra: Any, ) -> SessionStream[Any]: """Run a task and yield messages as they happen. @@ -224,9 +231,12 @@ def stream( workspace_id=workspace_id, enable_recording=enable_recording, cache_script=cache_script, + auto_heal=auto_heal, **extra, ) - return SessionStream(data, self.sessions, resolved_schema, _start_cursor=start_cursor) + return SessionStream( + data, self.sessions, resolved_schema, _start_cursor=start_cursor + ) def close(self) -> None: """Close the underlying HTTP client.""" @@ -280,6 +290,7 @@ def run( workspace_id: str | None = ..., enable_recording: bool | None = ..., cache_script: bool | None = ..., + auto_heal: bool | None = ..., **extra: Any, ) -> AsyncSessionRun[T]: ... @@ -298,6 +309,7 @@ def run( workspace_id: str | None = ..., enable_recording: bool | None = ..., cache_script: bool | None = ..., + auto_heal: bool | None = ..., **extra: Any, ) -> AsyncSessionRun[T]: ... @@ -315,6 +327,7 @@ def run( workspace_id: str | None = ..., enable_recording: bool | None = ..., cache_script: bool | None = ..., + auto_heal: bool | None = ..., **extra: Any, ) -> AsyncSessionRun[str]: ... @@ -333,6 +346,7 @@ def run( workspace_id: str | None = None, enable_recording: bool | None = None, cache_script: bool | None = None, + auto_heal: bool | None = None, **extra: Any, ) -> AsyncSessionRun[Any]: """Run a task. Await the result for a SessionResult. @@ -344,7 +358,9 @@ def run( - False: force-disable caching. When active, the first call runs the full agent and saves a reusable script. - Subsequent calls with the same task template execute the script with $0 LLM cost. + Subsequent calls with the same task template execute the script. By default, + auto_heal may use lightweight validation or regenerate the script if output + looks wrong; set auto_heal=False to return the raw script output. """ if cache_script is True and not workspace_id: raise ValueError("workspace_id is required when cache_script=True") @@ -386,10 +402,16 @@ async def create_fn() -> SessionResponse: workspace_id=workspace_id, enable_recording=enable_recording, cache_script=cache_script, + auto_heal=auto_heal, **extra, ) - return AsyncSessionRun(create_fn, self.sessions, resolved_schema, _start_cursor_ref=lambda: start_cursor) + return AsyncSessionRun( + create_fn, + self.sessions, + resolved_schema, + _start_cursor_ref=lambda: start_cursor, + ) async def close(self) -> None: """Close the underlying HTTP client.""" diff --git a/browser-use-python/src/browser_use_sdk/v3/resources/sessions.py b/browser-use-python/src/browser_use_sdk/v3/resources/sessions.py index 99964fec..4d11678d 100644 --- a/browser-use-python/src/browser_use_sdk/v3/resources/sessions.py +++ b/browser-use-python/src/browser_use_sdk/v3/resources/sessions.py @@ -35,6 +35,7 @@ def create( enable_scheduled_tasks: bool | None = None, enable_recording: bool | None = None, cache_script: bool | None = None, + auto_heal: bool | None = None, **extra: Any, ) -> SessionResponse: """Create a session and optionally dispatch a task.""" @@ -52,7 +53,11 @@ def create( if profile_id is not None: body["profileId"] = profile_id if proxy_country_code is not _UNSET: - body["proxyCountryCode"] = proxy_country_code.lower() if isinstance(proxy_country_code, str) else proxy_country_code + body["proxyCountryCode"] = ( + proxy_country_code.lower() + if isinstance(proxy_country_code, str) + else proxy_country_code + ) if output_schema is not None: body["outputSchema"] = output_schema if workspace_id is not None: @@ -63,6 +68,8 @@ def create( body["enableRecording"] = enable_recording if cache_script is not None: body["cacheScript"] = cache_script + if auto_heal is not None: + body["autoHeal"] = auto_heal body.update(extra) return SessionResponse.model_validate( self._http.request("POST", "/sessions", json=body) @@ -92,7 +99,9 @@ def get(self, session_id: str | UUID) -> SessionResponse: self._http.request("GET", f"/sessions/{session_id}") ) - def stop(self, session_id: str | UUID, *, strategy: str | None = None, **extra: Any) -> SessionResponse: + def stop( + self, session_id: str | UUID, *, strategy: str | None = None, **extra: Any + ) -> SessionResponse: """Stop a session or the running task.""" body: dict[str, Any] | None = None if strategy is not None or extra: @@ -172,6 +181,7 @@ async def create( enable_scheduled_tasks: bool | None = None, enable_recording: bool | None = None, cache_script: bool | None = None, + auto_heal: bool | None = None, **extra: Any, ) -> SessionResponse: """Create a session and optionally dispatch a task.""" @@ -189,7 +199,11 @@ async def create( if profile_id is not None: body["profileId"] = profile_id if proxy_country_code is not _UNSET: - body["proxyCountryCode"] = proxy_country_code.lower() if isinstance(proxy_country_code, str) else proxy_country_code + body["proxyCountryCode"] = ( + proxy_country_code.lower() + if isinstance(proxy_country_code, str) + else proxy_country_code + ) if output_schema is not None: body["outputSchema"] = output_schema if workspace_id is not None: @@ -200,6 +214,8 @@ async def create( body["enableRecording"] = enable_recording if cache_script is not None: body["cacheScript"] = cache_script + if auto_heal is not None: + body["autoHeal"] = auto_heal body.update(extra) return SessionResponse.model_validate( await self._http.request("POST", "/sessions", json=body) @@ -229,7 +245,9 @@ async def get(self, session_id: str | UUID) -> SessionResponse: await self._http.request("GET", f"/sessions/{session_id}") ) - async def stop(self, session_id: str | UUID, *, strategy: str | None = None, **extra: Any) -> SessionResponse: + async def stop( + self, session_id: str | UUID, *, strategy: str | None = None, **extra: Any + ) -> SessionResponse: """Stop a session or the running task.""" body: dict[str, Any] | None = None if strategy is not None or extra: diff --git a/browser-use-python/tests/test_vibe.py b/browser-use-python/tests/test_vibe.py index ef8aa08d..fcbcd93a 100644 --- a/browser-use-python/tests/test_vibe.py +++ b/browser-use-python/tests/test_vibe.py @@ -8,6 +8,7 @@ import inspect import json +import os import typing from pathlib import Path from typing import Any, Dict, List, Set, Tuple @@ -17,10 +18,15 @@ # --------------------------------------------------------------------------- # Locate spec files via CLOUD_REPO_PATH in .env # --------------------------------------------------------------------------- -_SDK_REPO = Path(__file__).resolve().parents[2] # browser-use-python/tests -> sdk repo root +_SDK_REPO = ( + Path(__file__).resolve().parents[2] +) # browser-use-python/tests -> sdk repo root def _get_cloud_repo_path() -> Path: + if env_path := os.environ.get("CLOUD_REPO_PATH"): + return Path(env_path) + env_file = _SDK_REPO / ".env" for line in env_file.read_text().splitlines(): line = line.strip() @@ -89,7 +95,10 @@ def _load_spec(path: Path) -> Dict[str, Any]: ("post", "/skills/{skill_id}/refine"): ("skills", "refine"), ("post", "/skills/{skill_id}/rollback"): ("skills", "rollback"), ("get", "/skills/{skill_id}/executions"): ("skills", "executions"), - ("get", "/skills/{skill_id}/executions/{execution_id}/output"): ("skills", "execution_output"), + ("get", "/skills/{skill_id}/executions/{execution_id}/output"): ( + "skills", + "execution_output", + ), # marketplace ("get", "/marketplace/skills"): ("marketplace", "list"), ("get", "/marketplace/skills/{skill_slug}"): ("marketplace", "get"), @@ -158,10 +167,6 @@ def test_all_spec_endpoints_mapped(self) -> None: assert not missing, f"Unmapped v2 endpoints: {missing}" def test_sdk_methods_exist(self) -> None: - from browser_use_sdk.v2.client import BrowserUse - - client = BrowserUse.__new__(BrowserUse) - # Manually set up resource stubs so we can inspect from browser_use_sdk.v2 import resources for resource_attr, method_name in _V2_MAP.values(): @@ -181,9 +186,7 @@ def test_sdk_methods_exist(self) -> None: f"{cls.__name__} missing method '{method_name}'" ) method = getattr(cls, method_name) - assert callable(method), ( - f"{cls.__name__}.{method_name} is not callable" - ) + assert callable(method), f"{cls.__name__}.{method_name} is not callable" def test_async_sdk_methods_exist(self) -> None: from browser_use_sdk.v2 import resources @@ -221,7 +224,13 @@ def test_all_spec_endpoints_mapped(self) -> None: assert not missing, f"Unmapped v3 endpoints: {missing}" def test_sdk_methods_exist(self) -> None: - from browser_use_sdk.v3.resources import billing, browsers, profiles, sessions, workspaces + from browser_use_sdk.v3.resources import ( + billing, + browsers, + profiles, + sessions, + workspaces, + ) resource_classes = { "billing": billing.Billing, @@ -237,7 +246,13 @@ def test_sdk_methods_exist(self) -> None: ) def test_async_sdk_methods_exist(self) -> None: - from browser_use_sdk.v3.resources import billing, browsers, profiles, sessions, workspaces + from browser_use_sdk.v3.resources import ( + billing, + browsers, + profiles, + sessions, + workspaces, + ) async_classes = { "billing": billing.AsyncBilling, @@ -302,11 +317,7 @@ def _load(self) -> None: def _get_query_params(self, method: str, path: str) -> Set[str]: op = self.spec["paths"].get(path, {}).get(method, {}) - return { - p["name"] - for p in op.get("parameters", []) - if p.get("in") == "query" - } + return {p["name"] for p in op.get("parameters", []) if p.get("in") == "query"} def _resolve_ref(self, ref: str) -> Dict[str, Any]: parts = ref.lstrip("#/").split("/") @@ -408,10 +419,15 @@ def test_task_action_variants(self) -> None: if not method_name: missing.append(f"No SDK method mapping for action '{action}'") continue - for label, classes_fn in [("sync", _get_resource_classes), ("async", _get_async_resource_classes)]: + for label, classes_fn in [ + ("sync", _get_resource_classes), + ("async", _get_async_resource_classes), + ]: cls = classes_fn()["tasks"] if not hasattr(cls, method_name): - missing.append(f"{cls.__name__} missing '{method_name}' for action '{action}'") + missing.append( + f"{cls.__name__} missing '{method_name}' for action '{action}'" + ) assert not missing, "Missing action variants:\n" + "\n".join(missing) @@ -435,8 +451,14 @@ def test_v2_resources_attached(self) -> None: client = BrowserUse(api_key="test-key") expected = [ - "billing", "tasks", "sessions", "files", - "profiles", "browsers", "skills", "marketplace", + "billing", + "tasks", + "sessions", + "files", + "profiles", + "browsers", + "skills", + "marketplace", ] for attr in expected: assert hasattr(client, attr), f"BrowserUse missing .{attr}" @@ -449,3 +471,35 @@ def test_v3_resources_attached(self) -> None: assert hasattr(client, "sessions") assert hasattr(client, "workspaces") client.close() + + +class TestV3SessionPayloads: + def test_sessions_create_serializes_auto_heal(self) -> None: + from browser_use_sdk.v3.resources.sessions import Sessions + + class FakeHttp: + def __init__(self) -> None: + self.calls: list[dict[str, Any]] = [] + + def request(self, method: str, path: str, *, json=None, params=None): + self.calls.append( + {"method": method, "path": path, "json": json, "params": params} + ) + return { + "id": "00000000-0000-0000-0000-000000000001", + "status": "created", + "model": "gemini-3-flash", + "createdAt": "2026-05-26T00:00:00Z", + "updatedAt": "2026-05-26T00:00:00Z", + } + + http = FakeHttp() + Sessions(http).create( + "Fetch https://httpbin.org/anything?item=@{{alpha}}", + workspace_id="00000000-0000-0000-0000-000000000002", + cache_script=True, + auto_heal=False, + ) + + assert http.calls[0]["json"]["cacheScript"] is True + assert http.calls[0]["json"]["autoHeal"] is False diff --git a/docs/cloud/agent/cache-script.mdx b/docs/cloud/agent/cache-script.mdx index 6f850920..315a6dee 100644 --- a/docs/cloud/agent/cache-script.mdx +++ b/docs/cloud/agent/cache-script.mdx @@ -1,14 +1,16 @@ --- title: Deterministic rerun -description: "Run a task once, then re-execute it for $0 LLM cost." +description: "Run a task once, then rerun it with new parameters from a cached script." icon: bolt --- -Deterministic rerun lets you run a browser task once with a full agent, then **re-execute the same task instantly** using a cached script — no LLM, up to 99% cheaper. +Deterministic rerun lets you run a task once with the full agent, save the learned workflow as a Python script, then **rerun that script with new parameters**. Cache hits skip the full agent and can drop LLM cost to zero when the script succeeds without auto-healing or explicit LLM calls. + +The cached script still fetches the website or API again. It does not replay the first run's output, so updated page/API content is reflected on reruns. ## Quick start -Use `@{{double brackets}}` around values that can change between runs. The first call runs the full agent. Every subsequent call with the same template uses the cached script. +Use `@{{double brackets}}` around values that can change between runs. The first call runs the full agent and creates a script in your workspace. Every subsequent call with the same template runs that script with the new parameter values. ```python Python @@ -17,16 +19,17 @@ from browser_use_sdk.v3 import AsyncBrowserUse client = AsyncBrowserUse() workspace = await client.workspaces.create(name="my-scraper") -# First call — agent explores, creates script (~$0.10, ~60s) +# First call: agent explores, creates scripts/.py result = await client.run( - "Get the top @{{5}} stories from https://news.ycombinator.com as JSON", + "Fetch https://httpbin.org/anything?item=@{{alpha}} and return args.item, url, and response date as JSON", workspace_id=str(workspace.id), ) -# Second call — cached script, different param ($0 LLM, ~5s) +# Second call: same template, new parameter, cached script path result2 = await client.run( - "Get the top @{{10}} stories from https://news.ycombinator.com as JSON", + "Fetch https://httpbin.org/anything?item=@{{beta}} and return args.item, url, and response date as JSON", workspace_id=str(workspace.id), + auto_heal=False, # optional: disables validation/healing LLM calls ) ``` ```typescript TypeScript @@ -35,20 +38,22 @@ import { BrowserUse } from "browser-use-sdk/v3"; const client = new BrowserUse(); const workspace = await client.workspaces.create({ name: "my-scraper" }); -// First call — agent explores, creates script (~$0.10, ~60s) +// First call: agent explores, creates scripts/.py const result = await client.run( - "Get the top @{{5}} stories from https://news.ycombinator.com as JSON", + "Fetch https://httpbin.org/anything?item=@{{alpha}} and return args.item, url, and response date as JSON", { workspaceId: workspace.id }, ); -// Second call — cached script, different param ($0 LLM, ~5s) +// Second call: same template, new parameter, cached script path const result2 = await client.run( - "Get the top @{{10}} stories from https://news.ycombinator.com as JSON", - { workspaceId: workspace.id }, + "Fetch https://httpbin.org/anything?item=@{{beta}} and return args.item, url, and response date as JSON", + { workspaceId: workspace.id, autoHeal: false }, ); ``` +On the cached run, `result2.output` comes from a new request to `httpbin.org` with `item=beta`. In live testing this path returned `0` input tokens, `0` output tokens, and `llmCostUsd: 0`. + ## How it works @@ -65,17 +70,22 @@ const result2 = await client.run( The system strips the values to create a **template**: `"Get prices from @{{}} for @{{}}"`. - Template `"Get prices from @{{}} for @{{}}"` is hashed to a unique ID like `a7f3b2c1`. - The system checks the workspace for `scripts/a7f3b2c1.py`. + Template `"Get prices from @{{}} for @{{}}"` is normalized, hashed, and stored as a 16-character script filename like `scripts/a7f3b2c19d4e8f01.py`. + + The cache key ignores parameter values. It is based on the template after values are replaced with `@{{}}`, whitespace is collapsed, and the template is lowercased. - If no script exists, the full agent runs your task. After completing it, the agent saves a standalone Python script that reproduces the result deterministically — no AI needed. + If no script exists, the full agent runs your task. After completing it, the agent saves a standalone Python script that can reproduce the workflow with new parameters. - If the script exists, it runs directly with the new parameter values. No agent, no LLM. Just the script in a sandbox with browser and proxy. + If the script exists, it runs directly with the new parameter values. This skips the full browser agent. The script can use a browser, or it can use Browser Use's `fetch` helper for direct HTTP requests when a browser is unnecessary. + +Keep each `@{{...}}` value on one line, and do not put `}}` inside a parameter value. Changing only parameter values reuses the script; changing the wording or the number/order of parameters creates a different script. + + ## Auto-detection Caching activates **automatically** when both conditions are met: @@ -90,11 +100,15 @@ No extra flags needed. You can override with `cache_script`: | `True` | Force-enable, even without brackets | | `False` | Force-disable, even if brackets are present | + +`cacheScript` applies to create-and-run calls. Follow-up dispatches to an existing `sessionId` currently run as normal agent tasks, even if the task contains `@{{...}}`. + + ## Examples ### Parameterized scraping -Run once, then loop over different keywords at $0 LLM each: +Run once, then loop over different keywords. Cache hits skip the full agent: ```python Python @@ -104,7 +118,7 @@ result = await client.run( workspace_id=str(workspace.id), ) -# Instant reruns with different keywords +# Reruns with different keywords use the cached script for keyword in ["CEO", "marketing", "finance", "e-commerce"]: result = await client.run( f"Go to @{{{{https://intro.co/marketplace}}}} and get all @{{{{{keyword}}}}} experts as JSON", @@ -130,6 +144,76 @@ for (const keyword of ["CEO", "marketing", "finance", "e-commerce"]) { ``` +### Fresh content on rerun + +The cached script fetches the source again. It is useful for extraction jobs where the target page or API changes over time. + + +```python Python +result = await client.run( + "Fetch https://httpbin.org/anything?mode=@{{first}}&value=@{{red}} " + "and return JSON with mode, value, url, and response date", + workspace_id=str(workspace.id), + auto_heal=False, +) + +result2 = await client.run( + "Fetch https://httpbin.org/anything?mode=@{{second}}&value=@{{blue}} " + "and return JSON with mode, value, url, and response date", + workspace_id=str(workspace.id), + auto_heal=False, +) + +print(result2.output["mode"]) # "second" +print(result2.llm_cost_usd) # "0" when the cached script succeeds +``` +```typescript TypeScript +const result = await client.run( + "Fetch https://httpbin.org/anything?mode=@{{first}}&value=@{{red}} " + + "and return JSON with mode, value, url, and response date", + { workspaceId: workspace.id, autoHeal: false }, +); + +const result2 = await client.run( + "Fetch https://httpbin.org/anything?mode=@{{second}}&value=@{{blue}} " + + "and return JSON with mode, value, url, and response date", + { workspaceId: workspace.id, autoHeal: false }, +); + +console.log(result2.output.mode); // "second" +console.log(result2.llmCostUsd); // "0" when the cached script succeeds +``` + + +For browser-required pages, parameterize the URL or search terms: + + +```python Python +result = await client.run( + "Go to @{{https://example.com/}} in the browser. Return the final URL, title, H1, and first paragraph as JSON.", + workspace_id=str(workspace.id), +) + +result2 = await client.run( + "Go to @{{https://www.iana.org/domains/reserved}} in the browser. Return the final URL, title, H1, and first paragraph as JSON.", + workspace_id=str(workspace.id), +) +``` +```typescript TypeScript +const result = await client.run( + "Go to @{{https://example.com/}} in the browser. Return the final URL, title, H1, and first paragraph as JSON.", + { workspaceId: workspace.id }, +); + +const result2 = await client.run( + "Go to @{{https://www.iana.org/domains/reserved}} in the browser. Return the final URL, title, H1, and first paragraph as JSON.", + { workspaceId: workspace.id }, +); +``` + + +The second run returns content from the new URL, not a replay of the first page. + ### No parameters — cache the exact task Append empty brackets `@{{}}` to signal "cache this exact task": @@ -234,7 +318,7 @@ for f in files.files: print(f"{f.path} ({f.size} bytes)") # Download a script to inspect it -await client.workspaces.download(workspace.id, "scripts/a7f3b2c1.py", to="./my_script.py") +await client.workspaces.download(workspace.id, "scripts/a7f3b2c19d4e8f01.py", to="./my_script.py") ``` ```typescript TypeScript const files = await client.workspaces.files(workspace.id, { prefix: "scripts/" }); @@ -258,23 +342,52 @@ When a cached script runs, the system validates its output: Auto-healing is **limited to 1 attempt per run** to prevent runaway costs. If the healed script also fails, the output is returned as-is. +### Disable auto-healing + +If you want the cached path to return the raw script output and avoid validation/healing LLM calls, disable auto-healing: + + +```python Python +result = await client.run( + "Fetch https://httpbin.org/anything?item=@{{alpha}} and return JSON", + workspace_id=str(workspace.id), + auto_heal=False, +) +``` +```typescript TypeScript +const result = await client.run( + "Fetch https://httpbin.org/anything?item=@{{alpha}} and return JSON", + { workspaceId: workspace.id, autoHeal: false }, +); +``` + + ### Cost impact -| Scenario | LLM cost | -|----------|----------| -| Cached script succeeds | **$0** | -| Cached script fails, auto-heals | ~$0.05–1.00 (one full agent run) | -| Healed script also fails | Same as above (returns best-effort output) | +| Scenario | Full agent LLM | Other LLM | +|----------|----------------|-----------| +| Cache hit, `autoHeal: false`, script does not call LLM helpers | **$0** | **$0** | +| Cache hit, default `autoHeal: true` | **$0** | Lightweight validation may run | +| Cache hit, script calls an LLM helper | **$0** | Per-call LLM cost | +| Cache hit fails and auto-heals | Full agent cost | Validation cost | + +Auto-healing is enabled by default for all cached scripts. + +## Browser, fetch, and recordings + +Cached scripts do not always use a browser. During the first run, the agent may discover that the fastest reliable implementation is a direct HTTP request with Browser Use's `fetch` helper instead of browser navigation. That is expected for JSON APIs and many data-extraction tasks. + +If you pass `enableRecording: true`, `recordingUrls` are returned only when a browser session actually records and the MP4 is ready. Direct-HTTP cached runs may have no recording because there was no useful browser interaction to record. -Auto-healing is enabled by default for all cached scripts. No configuration needed. +On cache hits, `isTaskSuccessful` can be `null` even when `status` is `stopped` and `output` is correct, because the full agent success judge was skipped. For deterministic reruns, treat the returned output and your own validation/schema checks as the source of truth. ## Cost comparison -| | LLM cost | Browser + proxy | Time | +| | Full agent LLM | Browser/proxy | Typical time | |---|---|---|---| -| First call (agent) | ~$0.05–1.00 | Yes | ~30–120s | -| Cached calls | **$0** | Yes | ~3–10s | +| First call (agent) | ~$0.05–1.00 | If needed | ~30–120s | +| Cached call | **$0** unless auto-heal regenerates | If needed | ~3–15s | -The browser and proxy still run for cached calls (the script may need them), so there is a small infrastructure cost per execution. LLM cost drops to zero. +Cached calls can still have small infrastructure cost. If the script uses direct HTTP only, there may be no browser recording even with recording enabled. diff --git a/docs/cloud/openapi/v3.json b/docs/cloud/openapi/v3.json index a13b23de..71869221 100644 --- a/docs/cloud/openapi/v3.json +++ b/docs/cloud/openapi/v3.json @@ -3027,6 +3027,12 @@ ], "title": "Cachescript", "description": "Controls deterministic script caching. `null` (default): auto-detected \u2014 enabled when the task contains `@{{value}}` brackets and a workspace is attached. `true`: force-enable script caching even without brackets (caches the exact task). `false`: force-disable, even if brackets are present. When active, the first call runs the full agent and saves a reusable script. Subsequent calls with the same task template execute the cached script with $0 LLM cost. Requires workspace_id when enabled. Example: \"Get prices from @{{https://example.com}} for @{{electronics}}\"." + }, + "autoHeal": { + "type": "boolean", + "title": "Autoheal", + "description": "When cache_script is active, controls whether a lightweight LLM validates the cached script output. If the output looks incorrect (empty, error, wrong structure), the system automatically re-triggers the full agent to generate a new version of the script. Set to false to disable validation and always return the raw script output.", + "default": true } }, "type": "object", diff --git a/docs/cloud/tutorials/grow-therapy-compare.mdx b/docs/cloud/tutorials/grow-therapy-compare.mdx index 2138e6f3..e2ec23c6 100644 --- a/docs/cloud/tutorials/grow-therapy-compare.mdx +++ b/docs/cloud/tutorials/grow-therapy-compare.mdx @@ -85,8 +85,8 @@ const workspace = await client.workspaces.create({ name: "grow-therapy-search" } ```python Python result = await client.run( - "Go to growtherapy.com and search for therapists in {{New York}} " - "who specialize in {{anxiety}} and accept insurance. " + "Go to growtherapy.com and search for therapists in @{{New York}} " + "who specialize in @{{anxiety}} and accept insurance. " "Return the first 5 provider profiles as JSON.", workspace_id=str(workspace.id), output_schema=ProviderSearch, @@ -101,8 +101,8 @@ for p in result.output.providers: ``` ```typescript TypeScript const result = await client.run( - "Go to growtherapy.com and search for therapists in {{New York}} " + - "who specialize in {{anxiety}} and accept insurance. " + + "Go to growtherapy.com and search for therapists in @{{New York}} " + + "who specialize in @{{anxiety}} and accept insurance. " + "Return the first 5 provider profiles as JSON.", { workspaceId: workspace.id, schema: ProviderSearch }, ); @@ -120,6 +120,11 @@ for (const p of result.output.providers) { After the first run caches the search flow, sweep across different parameters at $0 LLM cost: + +Cache parameters must include the `@`: use `@{{value}}`, not `{{value}}`. +Plain `{{value}}` is treated as normal task text and will not trigger deterministic rerun. + + ```python Python locations = ["Los Angeles", "Chicago", "Houston", "Miami"] @@ -128,8 +133,8 @@ specialties = ["depression", "trauma", "ADHD"] for location in locations: for specialty in specialties: result = await client.run( - f"Go to growtherapy.com and search for therapists in {{{{{location}}}}} " - f"who specialize in {{{{{specialty}}}}} and accept insurance. " + f"Go to growtherapy.com and search for therapists in @{{{{{location}}}}} " + f"who specialize in @{{{{{specialty}}}}} and accept insurance. " f"Return the first 5 provider profiles as JSON.", workspace_id=str(workspace.id), output_schema=ProviderSearch, @@ -144,8 +149,8 @@ const specialties = ["depression", "trauma", "ADHD"]; for (const location of locations) { for (const specialty of specialties) { const result = await client.run( - `Go to growtherapy.com and search for therapists in {{${location}}} ` + - `who specialize in {{${specialty}}} and accept insurance. ` + + `Go to growtherapy.com and search for therapists in @{{${location}}} ` + + `who specialize in @{{${specialty}}} and accept insurance. ` + `Return the first 5 provider profiles as JSON.`, { workspaceId: workspace.id, schema: ProviderSearch }, );