Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs.json
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,7 @@
"sdk/guides/agent-server/api-sandbox",
"sdk/guides/agent-server/cloud-workspace",
"sdk/guides/agent-server/custom-tools",
"sdk/guides/agent-server/openai-gateway",
{
"group": "API Reference",
"openapi": {
Expand Down
182 changes: 182 additions & 0 deletions sdk/guides/agent-server/openai-gateway.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
---
title: OpenAI-Compatible Gateway
description: Call an OpenHands agent-server through the OpenAI Chat Completions protocol.
---

import RunExampleCode from "/sdk/shared-snippets/how-to-run-example.mdx";

The agent-server exposes an OpenAI-compatible `/v1/chat/completions` endpoint so clients that already speak the OpenAI protocol can call an OpenHands agent.

Use this when you want an existing chat UI, IDE integration, evaluation harness, or another agent to treat OpenHands as an OpenAI-style backend while still getting the full agent runtime behind the request.

## How it works

1. Save an LLM profile through the agent-server profile API.
2. List available gateway models with `GET /v1/models`.
3. Call `POST /v1/chat/completions` with a model ID shaped like `openhands_<profile_name>`.
4. Read `X-OpenHands-ServerConversation-ID` from the response.
5. Pass that header back on later requests to continue the same OpenHands conversation.

The gateway accepts the same session key in either OpenHands or OpenAI-compatible form:

- `X-Session-API-Key: <key>`
- `Authorization: Bearer <key>`

<Note>
The current gateway supports non-streaming Chat Completions requests. Requests with `stream: true` return a `400` response until streaming support is added.
</Note>

## Ready-to-run example

<Note>
This example is available on GitHub: [examples/02_remote_agent_server/14_openai_compatible_gateway.py](https://github.com/OpenHands/software-agent-sdk/blob/main/examples/02_remote_agent_server/14_openai_compatible_gateway.py)
</Note>

```python icon="python" expandable examples/02_remote_agent_server/14_openai_compatible_gateway.py
"""Use the agent-server through an OpenAI-compatible Chat Completions client.

This example starts a local agent-server, stores an LLM profile, lists it through
``GET /v1/models``, then calls ``POST /v1/chat/completions`` with the OpenAI
Python SDK. The returned ``X-OpenHands-ServerConversation-ID`` header is passed
back on a second call to continue the same OpenHands conversation.
"""

import os
from uuid import UUID

import httpx
from openai import OpenAI
from scripts.utils import ManagedAPIServer


# The gateway runs a full OpenHands agent, but OpenAI clients still need a
# normal model-like name. We create an LLM profile below and expose it as
# `openhands_<profile_name>` through `/v1/models`.

api_key = os.getenv("LLM_API_KEY") or os.getenv("OPENAI_API_KEY")
assert api_key is not None, "Set LLM_API_KEY or OPENAI_API_KEY."

llm_model = os.getenv("LLM_MODEL", "gpt-5-nano")
llm_base_url = os.getenv("LLM_BASE_URL")
profile_name = "gateway_demo"
gateway_model = f"openhands_{profile_name}"

# Start a local agent-server for the demo. `use_session_api_key=True` turns on
# authentication; the same key works as both `X-Session-API-Key` for native
# agent-server routes and `Authorization: Bearer ...` for OpenAI SDK calls.

with ManagedAPIServer(
port=8770,
use_session_api_key=True,
extra_env={
"OH_ENABLE_VNC": "0",
"OH_ENABLE_VSCODE": "0",
"OH_PRELOAD_TOOLS": "0",
"OH_SECRET_KEY": "example-secret-key-for-demo-only-32b",
"OH_WEBHOOKS": "[]",
},
health_request_timeout=2.0,
) as server:
session_api_key = (
os.getenv("SESSION_API_KEY")
or os.getenv("OH_SESSION_API_KEYS_0")
or server.session_api_key
)
assert session_api_key is not None

# Use the native REST API once to create the profile that backs the gateway
# model. After that, normal OpenAI SDK calls are enough for chat traffic.
api_client = httpx.Client(
base_url=server.base_url,
headers={"X-Session-API-Key": session_api_key},
timeout=120.0,
)
openai_client = OpenAI(
api_key=session_api_key,
base_url=f"{server.base_url}/v1",
timeout=120.0,
)

llm_config = {"model": llm_model, "api_key": api_key}
if llm_base_url:
llm_config["base_url"] = llm_base_url

# `gateway_demo` becomes visible to OpenAI clients as `openhands_gateway_demo`.
profile_response = api_client.post(
f"/api/profiles/{profile_name}",
json={"llm": llm_config, "include_secrets": True},
)
assert profile_response.status_code == 201, profile_response.text

models = openai_client.models.list()
model_ids = [model.id for model in models.data]
assert gateway_model in model_ids
print(f"Gateway models include: {gateway_model}")

# Ask through the OpenAI SDK. `with_raw_response` lets us read the custom
# response header that identifies the OpenHands conversation created behind
# this otherwise OpenAI-shaped request.

first_response = openai_client.chat.completions.with_raw_response.create(
model=gateway_model,
messages=[
{
"role": "system",
"content": "Answer directly and do not use tools.",
},
{
"role": "user",
"content": (
"In one sentence, explain what an OpenAI-compatible "
"agent-server gateway does."
),
},
],
)
first_completion = first_response.parse()
conversation_id = first_response.headers.get("X-OpenHands-ServerConversation-ID")
assert conversation_id is not None
UUID(conversation_id)

first_answer = first_completion.choices[0].message.content
print(f"First answer: {first_answer}")
print(f"OpenHands conversation ID: {conversation_id}")

persisted_response = api_client.get(f"/api/conversations/{conversation_id}")
assert persisted_response.status_code == 200, persisted_response.text

# The gateway keeps conversations by default. Passing the header back lets
# another OpenAI-compatible request continue the same server-side agent
# conversation instead of starting over.

second_completion = openai_client.chat.completions.create(
model=gateway_model,
messages=[
{
"role": "user",
"content": "Now answer in five words or fewer: what did I ask about?",
}
],
extra_headers={"X-OpenHands-ServerConversation-ID": conversation_id},
)
second_answer = second_completion.choices[0].message.content
print(f"Second answer using same conversation: {second_answer}")

conversation_response = api_client.get(f"/api/conversations/{conversation_id}")
assert conversation_response.status_code == 200, conversation_response.text
stats = conversation_response.json().get("stats") or {}
usage_to_metrics = stats.get("usage_to_metrics") or {}
accumulated_cost = sum(
metrics.get("accumulated_cost", 0.0) for metrics in usage_to_metrics.values()
)

# Clean up the demo resources. Real applications can keep the conversation
# ID and inspect it later through the native agent-server API.
api_client.delete(f"/api/conversations/{conversation_id}")
api_client.delete(f"/api/profiles/{profile_name}")
api_client.close()

print(f"EXAMPLE_COST: {accumulated_cost}")
```

<RunExampleCode path_to_script="examples/02_remote_agent_server/14_openai_compatible_gateway.py"/>
Loading