From a6bc29560a25f250c77a789074cccc62173b1485 Mon Sep 17 00:00:00 2001
From: enyst <engel.nyst@gmail.com>
Date: Sat, 6 Jun 2026 14:04:57 +0000
Subject: [PATCH 1/4] docs: add OpenAI-compatible agent-server gateway guide

Co-authored-by: openhands <openhands@all-hands.dev>
---
 docs.json                                  |   1 +
 sdk/guides/agent-server/openai-gateway.mdx | 156 +++++++++++++++++++++
 2 files changed, 157 insertions(+)
 create mode 100644 sdk/guides/agent-server/openai-gateway.mdx
diff --git a/docs.json b/docs.json
index 59df76aa..d1e84995 100644
--- a/docs.json
+++ b/docs.json
@@ -303,6 +303,7 @@
                   "sdk/guides/agent-server/api-sandbox",
                   "sdk/guides/agent-server/cloud-workspace",
                   "sdk/guides/agent-server/custom-tools",
+                  "sdk/guides/agent-server/openai-gateway",
                   {
                     "group": "API Reference",
                     "openapi": {
diff --git a/sdk/guides/agent-server/openai-gateway.mdx b/sdk/guides/agent-server/openai-gateway.mdx
new file mode 100644
index 00000000..fcec83cf
--- /dev/null
+++ b/sdk/guides/agent-server/openai-gateway.mdx
@@ -0,0 +1,156 @@
+---
+title: OpenAI-Compatible Gateway
+description: Call an OpenHands agent-server through the OpenAI Chat Completions protocol.
+---
+
+import RunExampleCode from "/sdk/shared-snippets/how-to-run-example.mdx";
+
+The agent-server exposes an OpenAI-compatible `/v1/chat/completions` endpoint so clients that already speak the OpenAI protocol can call an OpenHands agent.
+
+Use this when you want an existing chat UI, IDE integration, evaluation harness, or another agent to treat OpenHands as an OpenAI-style backend while still getting the full agent runtime behind the request.
+
+## How it works
+
+1. Save an LLM profile through the agent-server profile API.
+2. List available gateway models with `GET /v1/models`.
+3. Call `POST /v1/chat/completions` with a model ID shaped like `openhands_<profile_name>`.
+4. Read `X-OpenHands-ServerConversation-ID` from the response.
+5. Pass that header back on later requests to continue the same OpenHands conversation.
+
+The gateway accepts the same session key in either OpenHands or OpenAI-compatible form:
+
+- `X-Session-API-Key: <key>`
+- `Authorization: Bearer <key>`
+
+## Ready-to-run example
+
+<Note>
+This example is available on GitHub: [examples/02_remote_agent_server/14_openai_compatible_gateway.py](https://github.com/OpenHands/software-agent-sdk/blob/main/examples/02_remote_agent_server/14_openai_compatible_gateway.py)
+</Note>
+
+```python icon="python" expandable examples/02_remote_agent_server/14_openai_compatible_gateway.py
+"""Use the agent-server through an OpenAI-compatible Chat Completions client.
+
+This example starts a local agent-server, stores an LLM profile, lists it through
+``GET /v1/models``, then calls ``POST /v1/chat/completions`` with the OpenAI
+Python SDK. The returned ``X-OpenHands-ServerConversation-ID`` header is passed
+back on a second call to continue the same OpenHands conversation.
+"""
+
+import os
+from uuid import UUID
+
+import httpx
+from openai import OpenAI
+from scripts.utils import ManagedAPIServer
+
+
+api_key = os.getenv("LLM_API_KEY") or os.getenv("OPENAI_API_KEY")
+assert api_key is not None, "Set LLM_API_KEY or OPENAI_API_KEY."
+
+llm_model = os.getenv("LLM_MODEL", "gpt-5-nano")
+llm_base_url = os.getenv("LLM_BASE_URL")
+profile_name = "gateway_demo"
+gateway_model = f"openhands_{profile_name}"
+
+with ManagedAPIServer(
+    port=8770,
+    use_session_api_key=True,
+    extra_env={
+        "OH_ENABLE_VNC": "0",
+        "OH_ENABLE_VSCODE": "0",
+        "OH_PRELOAD_TOOLS": "0",
+        "OH_SECRET_KEY": "example-secret-key-for-demo-only-32b",
+        "OH_WEBHOOKS": "[]",
+    },
+    health_request_timeout=2.0,
+) as server:
+    session_api_key = (
+        os.getenv("SESSION_API_KEY")
+        or os.getenv("OH_SESSION_API_KEYS_0")
+        or server.session_api_key
+    )
+    assert session_api_key is not None
+    api_client = httpx.Client(
+        base_url=server.base_url,
+        headers={"X-Session-API-Key": session_api_key},
+        timeout=120.0,
+    )
+    openai_client = OpenAI(
+        api_key=session_api_key,
+        base_url=f"{server.base_url}/v1",
+        timeout=120.0,
+    )
+
+    llm_config = {"model": llm_model, "api_key": api_key}
+    if llm_base_url:
+        llm_config["base_url"] = llm_base_url
+
+    profile_response = api_client.post(
+        f"/api/profiles/{profile_name}",
+        json={"llm": llm_config, "include_secrets": True},
+    )
+    assert profile_response.status_code == 201, profile_response.text
+
+    models = openai_client.models.list()
+    model_ids = [model.id for model in models.data]
+    assert gateway_model in model_ids
+    print(f"Gateway models include: {gateway_model}")
+
+    first_response = openai_client.chat.completions.with_raw_response.create(
+        model=gateway_model,
+        messages=[
+            {
+                "role": "system",
+                "content": "Answer directly and do not use tools.",
+            },
+            {
+                "role": "user",
+                "content": (
+                    "In one sentence, explain what an OpenAI-compatible "
+                    "agent-server gateway does."
+                ),
+            },
+        ],
+    )
+    first_completion = first_response.parse()
+    conversation_id = first_response.headers.get("X-OpenHands-ServerConversation-ID")
+    assert conversation_id is not None
+    UUID(conversation_id)
+
+    first_answer = first_completion.choices[0].message.content
+    print(f"First answer: {first_answer}")
+    print(f"OpenHands conversation ID: {conversation_id}")
+
+    persisted_response = api_client.get(f"/api/conversations/{conversation_id}")
+    assert persisted_response.status_code == 200, persisted_response.text
+
+    second_completion = openai_client.chat.completions.create(
+        model=gateway_model,
+        messages=[
+            {
+                "role": "user",
+                "content": "Now answer in five words or fewer: what did I ask about?",
+            }
+        ],
+        extra_headers={"X-OpenHands-ServerConversation-ID": conversation_id},
+    )
+    second_answer = second_completion.choices[0].message.content
+    print(f"Second answer using same conversation: {second_answer}")
+
+    conversation_response = api_client.get(f"/api/conversations/{conversation_id}")
+    assert conversation_response.status_code == 200, conversation_response.text
+    stats = conversation_response.json().get("stats") or {}
+    usage_to_metrics = stats.get("usage_to_metrics") or {}
+    accumulated_cost = sum(
+        metrics.get("accumulated_cost", 0.0) for metrics in usage_to_metrics.values()
+    )
+
+    api_client.delete(f"/api/conversations/{conversation_id}")
+    api_client.delete(f"/api/profiles/{profile_name}")
+    api_client.close()
+
+    print(f"EXAMPLE_COST: {accumulated_cost}")
+```
+
+<RunExampleCode path_to_script="examples/02_remote_agent_server/14_openai_compatible_gateway.py"/>

From 1fe5c9e9dee92c962efd6b6003afdac92a13df77 Mon Sep 17 00:00:00 2001
From: enyst <engel.nyst@gmail.com>
Date: Sat, 6 Jun 2026 14:16:27 +0000
Subject: [PATCH 2/4] docs: clarify OpenAI gateway example

Co-authored-by: openhands <openhands@all-hands.dev>
---
 sdk/guides/agent-server/openai-gateway.mdx | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/sdk/guides/agent-server/openai-gateway.mdx b/sdk/guides/agent-server/openai-gateway.mdx
index fcec83cf..ea4850cd 100644
--- a/sdk/guides/agent-server/openai-gateway.mdx
+++ b/sdk/guides/agent-server/openai-gateway.mdx
@@ -45,6 +45,10 @@ from openai import OpenAI
 from scripts.utils import ManagedAPIServer
 
 
+# The gateway runs a full OpenHands agent, but OpenAI clients still need a
+# normal model-like name. We create an LLM profile below and expose it as
+# `openhands_<profile_name>` through `/v1/models`.
+
 api_key = os.getenv("LLM_API_KEY") or os.getenv("OPENAI_API_KEY")
 assert api_key is not None, "Set LLM_API_KEY or OPENAI_API_KEY."
 
@@ -53,6 +57,10 @@ llm_base_url = os.getenv("LLM_BASE_URL")
 profile_name = "gateway_demo"
 gateway_model = f"openhands_{profile_name}"
 
+# Start a local agent-server for the demo. `use_session_api_key=True` turns on
+# authentication; the same key works as both `X-Session-API-Key` for native
+# agent-server routes and `Authorization: Bearer ...` for OpenAI SDK calls.
+
 with ManagedAPIServer(
     port=8770,
     use_session_api_key=True,
@@ -71,6 +79,9 @@ with ManagedAPIServer(
         or server.session_api_key
     )
     assert session_api_key is not None
+
+    # Use the native REST API once to create the profile that backs the gateway
+    # model. After that, normal OpenAI SDK calls are enough for chat traffic.
     api_client = httpx.Client(
         base_url=server.base_url,
         headers={"X-Session-API-Key": session_api_key},
@@ -86,6 +97,7 @@ with ManagedAPIServer(
     if llm_base_url:
         llm_config["base_url"] = llm_base_url
 
+    # `gateway_demo` becomes visible to OpenAI clients as `openhands_gateway_demo`.
     profile_response = api_client.post(
         f"/api/profiles/{profile_name}",
         json={"llm": llm_config, "include_secrets": True},
@@ -97,6 +109,10 @@ with ManagedAPIServer(
     assert gateway_model in model_ids
     print(f"Gateway models include: {gateway_model}")
 
+    # Ask through the OpenAI SDK. `with_raw_response` lets us read the custom
+    # response header that identifies the OpenHands conversation created behind
+    # this otherwise OpenAI-shaped request.
+
     first_response = openai_client.chat.completions.with_raw_response.create(
         model=gateway_model,
         messages=[
@@ -125,6 +141,10 @@ with ManagedAPIServer(
     persisted_response = api_client.get(f"/api/conversations/{conversation_id}")
     assert persisted_response.status_code == 200, persisted_response.text
 
+    # The gateway keeps conversations by default. Passing the header back lets
+    # another OpenAI-compatible request continue the same server-side agent
+    # conversation instead of starting over.
+
     second_completion = openai_client.chat.completions.create(
         model=gateway_model,
         messages=[
@@ -146,6 +166,8 @@ with ManagedAPIServer(
         metrics.get("accumulated_cost", 0.0) for metrics in usage_to_metrics.values()
     )
 
+    # Clean up the demo resources. Real applications can keep the conversation
+    # ID and inspect it later through the native agent-server API.
     api_client.delete(f"/api/conversations/{conversation_id}")
     api_client.delete(f"/api/profiles/{profile_name}")
     api_client.close()

From 17279ce1e961969cffdd6f94901a6cc9b0b8c3cc Mon Sep 17 00:00:00 2001
From: enyst <engel.nyst@gmail.com>
Date: Tue, 9 Jun 2026 10:48:40 +0000
Subject: [PATCH 3/4] docs: clarify OpenAI gateway streaming support

Co-authored-by: openhands <openhands@all-hands.dev>
---
 sdk/guides/agent-server/openai-gateway.mdx | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sdk/guides/agent-server/openai-gateway.mdx b/sdk/guides/agent-server/openai-gateway.mdx
index ea4850cd..f40a7a60 100644
--- a/sdk/guides/agent-server/openai-gateway.mdx
+++ b/sdk/guides/agent-server/openai-gateway.mdx
@@ -22,6 +22,10 @@ The gateway accepts the same session key in either OpenHands or OpenAI-compatibl
 - `X-Session-API-Key: <key>`
 - `Authorization: Bearer <key>`
 
+<Note>
+The current gateway supports non-streaming Chat Completions requests. Requests with `stream: true` return a `400` response until streaming support is added.
+</Note>
+
 ## Ready-to-run example
 
 <Note>

From 2da159b88c9e5d1c03ad3c48063999045c7822c4 Mon Sep 17 00:00:00 2001
From: enyst <engel.nyst@gmail.com>
Date: Wed, 10 Jun 2026 17:09:28 +0000
Subject: [PATCH 4/4] docs: update OpenAI gateway example path

Co-authored-by: openhands <openhands@all-hands.dev>
---
 sdk/guides/agent-server/openai-gateway.mdx | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sdk/guides/agent-server/openai-gateway.mdx b/sdk/guides/agent-server/openai-gateway.mdx
index f40a7a60..439ae21e 100644
--- a/sdk/guides/agent-server/openai-gateway.mdx
+++ b/sdk/guides/agent-server/openai-gateway.mdx
@@ -29,10 +29,10 @@ The current gateway supports non-streaming Chat Completions requests. Requests w
 ## Ready-to-run example
 
 <Note>
-This example is available on GitHub: [examples/02_remote_agent_server/14_openai_compatible_gateway.py](https://github.com/OpenHands/software-agent-sdk/blob/main/examples/02_remote_agent_server/14_openai_compatible_gateway.py)
+This example is available on GitHub: [examples/02_remote_agent_server/15_openai_compatible_gateway.py](https://github.com/OpenHands/software-agent-sdk/blob/main/examples/02_remote_agent_server/15_openai_compatible_gateway.py)
 </Note>
 
-```python icon="python" expandable examples/02_remote_agent_server/14_openai_compatible_gateway.py
+```python icon="python" expandable examples/02_remote_agent_server/15_openai_compatible_gateway.py
 """Use the agent-server through an OpenAI-compatible Chat Completions client.
 
 This example starts a local agent-server, stores an LLM profile, lists it through
@@ -179,4 +179,4 @@ with ManagedAPIServer(
     print(f"EXAMPLE_COST: {accumulated_cost}")
 ```
 
-<RunExampleCode path_to_script="examples/02_remote_agent_server/14_openai_compatible_gateway.py"/>
+<RunExampleCode path_to_script="examples/02_remote_agent_server/15_openai_compatible_gateway.py"/>