diff --git a/.gitignore b/.gitignore
index d3cace3..552c3d3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -204,4 +204,5 @@ tmp/
 *.ignore
 
 # prevent cookies from being commited
-dsk
\ No newline at end of file
+dsk
+
diff --git a/README.md b/README.md
index ede6dbc..f81ac55 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
 ![License](https://img.shields.io/badge/license-Apache2.0-lightgrey.svg)
 ![Status](https://img.shields.io/badge/status-experimental-orange.svg)
 
-A lightweight async Python client for interacting with DeepSeek chat infrastructure through a local bypass + cookie + proof-of-work pipeline.
+A lightweight async Python client for interacting with DeepSeek chat infrastructure through a local bypass + cookie + proof-of-work pipeline, with **OpenAI-compatible API server**.
 
 This project follows the same conceptual direction (and also contains elements from) as:
 - [https://github.com/xtekky/deepseek4free](https://github.com/xtekky/deepseek4free)
@@ -31,6 +31,7 @@ dsk++ provides:
 * WASM-based proof-of-work solver with async thread pool
 * concurrent file upload support using asyncio.gather()
 * automatic Cloudflare detection and cookie refresh
+* **OpenAI-compatible API server** for use with AI agents (Cline, LangChain, OpenAI SDK, etc.)
 
 ---
 
@@ -83,26 +84,164 @@ This will:
 
 ---
 
+## OpenAI-compatible API
+
+dskpp can be used as an **OpenAI-compatible API server**, allowing integration with AI agents like **Cline**, **LangChain**, **OpenAI Python SDK**, and others.
+
+### start the server
+
+```bash
+python server.py
+```
+
+### start with authentication
+
+```bash
+export AUTH_KEY=your-secret-key
+python server.py
+```
+
+### environment variables
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `SERVER_PORT` | `8021` | Server port |
+| `DOCKERMODE` | `false` | Docker mode flag (headless Chrome) |
+| `AUTH_KEY` | *(empty)* | API key for Bearer auth. If not set, auth is disabled |
+| `MODEL_NAME` | `deepseek-chat` | Default model name |
+
+### API endpoints
+
+| Method | Path | Description |
+|--------|------|-------------|
+| `GET` | `/v1/models` | List available models |
+| `POST` | `/v1/chat/completions` | Chat completion (streaming + non-streaming) |
+| `GET` | `/health` | Health check |
+| `GET` | `/cookies` | Get Cloudflare bypass cookies (original) |
+| `GET` | `/html` | Get page HTML with cookies (original) |
+
+### authentication
+
+The server supports two authentication methods:
+
+1. **Authorization header** (recommended for AI agents):
+   ```
+   Authorization: Bearer your-secret-key
+   ```
+
+2. **Query parameter**:
+   ```
+   ?key=your-secret-key
+   ```
+
+If `AUTH_KEY` is not set, authentication is disabled and all requests are accepted.
+
+### available models
+
+| Model ID | Description |
+|----------|-------------|
+| `deepseek-chat` | Standard DeepSeek chat model |
+| `deepseek-reasoner` | DeepSeek with reasoning/thinking enabled |
+
+### use with Cline
+
+Configure Cline to use your dskpp server:
+
+```json
+{
+  "apiProvider": "openai",
+  "openAiBaseUrl": "http://localhost:8021/v1",
+  "openAiApiKey": "your-secret-key",
+  "openAiModelId": "deepseek-chat"
+}
+```
+
+### use with OpenAI Python SDK
+
+```python
+from openai import OpenAI
+
+client = OpenAI(
+    base_url="http://localhost:8021/v1",
+    api_key="your-secret-key",
+)
+
+response = client.chat.completions.create(
+    model="deepseek-chat",
+    messages=[
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": "Hello!"},
+    ],
+    stream=True,
+)
+
+for chunk in response:
+    if chunk.choices[0].delta.content:
+        print(chunk.choices[0].delta.content, end="")
+```
+
+### use with LangChain
+
+```python
+from langchain_openai import ChatOpenAI
+
+llm = ChatOpenAI(
+    base_url="http://localhost:8021/v1",
+    api_key="your-secret-key",
+    model="deepseek-chat",
+)
+
+response = llm.invoke("Hello!")
+print(response.content)
+```
+
+### example: curl
+
+```bash
+# List models
+curl http://localhost:8021/v1/models
+
+# Chat completion (non-streaming)
+curl -X POST http://localhost:8021/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "deepseek-chat",
+    "messages": [{"role": "user", "content": "Hello!"}]
+  }'
+
+# Chat completion (streaming)
+curl -X POST http://localhost:8021/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "deepseek-chat",
+    "messages": [{"role": "user", "content": "Hello!"}],
+    "stream": true
+  }'
+```
+
+---
+
 ## project structure
 
 ```
 dskpp/
 │
 ├── api.py                     # async DeepSeek API client
-├── server.py                 # FastAPI bypass + cookie server
-├── CloudflareBypasser.py     # Chromium-based challenge solver
-├── bypass.py                 # automation helper logic
-├── pow.py                   # WASM proof-of-work solver (async)
-├── run_and_get_cookies.py   # bootstrap cookie acquisition
+├── server.py                  # FastAPI server (bypass + OpenAI-compatible API)
+├── openai_compat.py           # OpenAI API models and helpers
+├── CloudflareBypasser.py      # Chromium-based challenge solver
+├── bypass.py                  # automation helper logic
+├── pow.py                     # WASM proof-of-work solver (async)
+├── run_and_get_cookies.py     # bootstrap cookie acquisition
 │
-├── dsk/                     # runtime cookie storage
-├── wasm/                    # WASM binaries for hashing
+├── dsk/                       # runtime cookie storage
+├── wasm/                      # WASM binaries for hashing
 └── README.md
 ```
 
 ---
 
-## usage
+## usage (Python API client)
 
 ### initialize client
 
@@ -138,20 +277,14 @@ print(result)  # "Successfully deleted session: session_id"
 ### file upload (concurrent multiple files)
 
 ```python
-# Upload multiple files concurrently
 file_ids = await api.upload_files([
     "document.pdf",
     "image.png",
     "data.csv"
 ])
-
-# file_ids returned in same order as input
 print(file_ids)  # ['file_id_1', 'file_id_2', 'file_id_3']
 ```
 
-> [!NOTE]
-> The system uploads all files simultaneously using `asyncio.gather()` for maximum efficiency.
-
 ---
 
 ### streaming chat with file references
@@ -160,17 +293,13 @@ print(file_ids)  # ['file_id_1', 'file_id_2', 'file_id_3']
 async for chunk in api.chat_completion(
     chat_session_id=session_id,
     prompt="Analyze these uploaded files",
-    ref_file_ids=file_ids,  # List of file IDs from upload
+    ref_file_ids=file_ids,
     thinking_enabled=True,
-    search_enabled=False  # Must be False when using files
+    search_enabled=False
 ):
-    # chunk is a dictionary with 'content' key
     print(chunk.get("content", ""), end="")
 ```
 
-> [!WARNING]
-> File uploads require `search_enabled=False`. Attempting to use both will raise `UploadFilesUnavailable`.
-
 ---
 
 ### chat with search
@@ -180,7 +309,7 @@ async for chunk in api.chat_completion(
     chat_session_id=session_id,
     prompt="What's the latest news about AI?",
     thinking_enabled=True,
-    search_enabled=True  # Enables web search for current info
+    search_enabled=True
 ):
     print(chunk.get("content", ""), end="")
 ```
@@ -193,24 +322,18 @@ The client parses DeepSeek's SSE format and returns dictionaries:
 
 ```python
 {
-    'type': 'text',           # Type of chunk (text, message_ids, etc.)
-    'content': 'incremental text...',  # The actual content
+    'type': 'text',
+    'content': 'incremental text...',
     'finish_reason': None     # 'stop' when complete, None otherwise
 }
 ```
 
-> [!NOTE]
-> The parser automatically handles both full format (with 'p' and 'o' fields) and simplified format (just 'v' field) chunks.
-
 ---
 
 ### non-streaming usage (aggregated)
 
-A fully buffered response can be constructed manually:
-
 ```python
 response = ""
-
 async for chunk in api.chat_completion(
     chat_session_id=session_id,
     prompt="Hello world"
@@ -220,21 +343,10 @@ async for chunk in api.chat_completion(
 
 ---
 
-### conversation history
-
-```python
-history = await api.get_history(session_id)
-print(history)
-```
-
----
-
 ### cleanup
 
-> [!IMPORTANT]
-> Always close the session when done to free resources:
-
 ```python
+await api.delete_chat_session(session)
 await api.close()
 ```
 
@@ -292,15 +404,24 @@ Async client for session-based chat interaction with:
 - automatic retry logic with Cloudflare detection and cookie refresh
 - async session management with curl_cffi
 
-### 2. bypass layer (`server.py`)
+### 2. Server layer (`server.py`)
+
+FastAPI server providing:
+- OpenAI-compatible REST API (`/v1/models`, `/v1/chat/completions`)
+- WebSocket proxy to upstream DeepSeek API
+- Bearer token authentication
+- Cloudflare bypass via Chromium automation
+- cookie extraction and validation
 
-FastAPI + Chromium automation for:
+### 3. OpenAI compatibility layer (`openai_compat.py`)
 
-* Cloudflare bypass
-* cookie extraction
-* page validation
+Pydantic models and helpers for:
+- OpenAI request/response format conversion
+- Messages array to prompt string conversion
+- Model name mapping to DeepSeek parameters
+- SSE streaming chunk formatting
 
-### 3. PoW layer (`pow.py`)
+### 4. PoW layer (`pow.py`)
 
 WebAssembly-based solver with async wrapper using asyncio.to_thread() to keep event loop responsive during CPU-bound computations.
 
@@ -316,7 +437,6 @@ The system is designed around non-blocking execution:
 * browser automation runs in separate processes
 * cookie acquisition runs outside event loop control path
 * concurrent file uploads using asyncio.gather()
-* colored warning outputs for better visibility
 
 ---
 
@@ -337,44 +457,6 @@ from dskpp.api import (
 
 ---
 
-## example: full flow with file upload
-
-```python
-import asyncio
-from dskpp.api import DeepSeekAPI
-
-async def main():
-    api = DeepSeekAPI("your_token_here")
-
-    # Create session
-    session = await api.create_chat_session()
-    print(f"Session created: {session}")
-
-    # Upload files concurrently
-    file_ids = await api.upload_files([
-        "report.pdf",
-        "data.xlsx"
-    ])
-    print(f"Uploaded files: {file_ids}")
-
-    # Chat with file context
-    async for chunk in api.chat_completion(
-        session,
-        "Analyze these files and summarize key points",
-        ref_file_ids=file_ids,
-        search_enabled=False  # Required for file uploads
-    ):
-        print(chunk.get("content", ""), end="")
-
-    # Cleanup
-    await api.delete_chat_session(session)
-    await api.close()
-
-asyncio.run(main())
-```
-
----
-
 ## notes
 
 > [!CAUTION]
diff --git a/openai_compat.py b/openai_compat.py
new file mode 100644
index 0000000..719887c
--- /dev/null
+++ b/openai_compat.py
@@ -0,0 +1,108 @@
+"""
+OpenAI-compatible API models and helpers for dskpp.
+"""
+
+import uuid
+import os
+from typing import List, Optional, Dict, Any, Literal
+from pydantic import BaseModel, Field
+
+
+AUTH_KEY = os.getenv("AUTH_KEY", "")
+DEFAULT_MODEL = os.getenv("MODEL_NAME", "deepseek-chat")
+
+AVAILABLE_MODELS = [
+    {
+        "id": "deepseek-chat",
+        "object": "model",
+        "created": 1700000000,
+        "owned_by": "deepseek",
+        "permission": [],
+        "root": "deepseek-chat",
+        "parent": None,
+    },
+    {
+        "id": "deepseek-reasoner",
+        "object": "model",
+        "created": 1700000000,
+        "owned_by": "deepseek",
+        "permission": [],
+        "root": "deepseek-reasoner",
+        "parent": None,
+    },
+]
+
+
+class ChatMessage(BaseModel):
+    role: Literal["system", "user", "assistant", "tool", "function"]
+    content: Optional[str] = None
+    name: Optional[str] = None
+
+
+class ChatCompletionRequest(BaseModel):
+    model: str = Field(default=DEFAULT_MODEL)
+    messages: List[ChatMessage]
+    stream: bool = Field(default=False)
+    temperature: Optional[float] = Field(default=None)
+    top_p: Optional[float] = Field(default=None)
+    n: Optional[int] = Field(default=None)
+    stop: Optional[Any] = Field(default=None)
+    max_tokens: Optional[int] = Field(default=None)
+    presence_penalty: Optional[float] = Field(default=None)
+    frequency_penalty: Optional[float] = Field(default=None)
+    user: Optional[str] = Field(default=None)
+
+
+def generate_id() -> str:
+    return f"chatcmpl-{uuid.uuid4().hex[:24]}"
+
+
+def estimate_tokens(text: str) -> int:
+    """Rough token estimate: ~4 chars per token for English."""
+    if not text:
+        return 0
+    return max(1, len(text) // 4)
+
+
+def messages_to_prompt(messages: List[ChatMessage]) -> str:
+    """Convert OpenAI messages array to a single prompt string."""
+    parts = []
+    for msg in messages:
+        role = msg.role
+        content = msg.content or ""
+        if role == "system":
+            parts.append(f"[System] {content}")
+        elif role == "user":
+            parts.append(f"[User] {content}")
+        elif role == "assistant":
+            parts.append(f"[Assistant] {content}")
+        else:
+            parts.append(f"[{role}] {content}")
+    return "\n\n".join(parts)
+
+
+def model_to_config(model_name: str) -> Dict[str, Any]:
+    """Map OpenAI model name to DeepSeek-specific parameters."""
+    model_lower = model_name.lower()
+
+    if model_lower in ("deepseek-reasoner", "deepseek-reasoning"):
+        return {"thinking_enabled": True, "search_enabled": False}
+
+    return {"thinking_enabled": False, "search_enabled": False}
+
+
+def make_error_response(
+    message: str,
+    error_type: str = "invalid_request_error",
+    param: Optional[str] = None,
+    code: Optional[str] = None,
+) -> Dict[str, Any]:
+    """Create an OpenAI-format error response dict."""
+    return {
+        "error": {
+            "message": message,
+            "type": error_type,
+            "param": param,
+            "code": code,
+        }
+    }
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 0924203..be38191 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,3 +8,4 @@ pydantic==2.13.4
 PyVirtualDisplay==3.0
 uvicorn==0.46.0
 wasmtime==42.0.0
+websockets>=12.0
diff --git a/server.py b/server.py
index c7b36bd..3ca24cf 100644
--- a/server.py
+++ b/server.py
@@ -1,28 +1,40 @@
 import json
 import re
 import os
-import asyncio
+import uuid
+import time
 from urllib.parse import urlparse
 
 from CloudflareBypasser import CloudflareBypasser
 from DrissionPage import ChromiumPage, ChromiumOptions
-from fastapi import FastAPI, HTTPException, Response
+from fastapi import FastAPI, HTTPException, Response, Request, Header
+from fastapi.responses import StreamingResponse
 from pydantic import BaseModel
-from typing import Dict
+from typing import Dict, Optional
 
 import argparse
 from pyvirtualdisplay import Display
 import uvicorn
 import atexit
-import time
 from asyncio import to_thread
 
+from openai_compat import (
+    AUTH_KEY,
+    AVAILABLE_MODELS,
+    ChatCompletionRequest,
+    messages_to_prompt,
+    model_to_config,
+    generate_id,
+    estimate_tokens,
+    make_error_response,
+)
+
 
 DOCKER_MODE = os.getenv("DOCKERMODE", "false").lower() == "true"
 SERVER_PORT = int(os.getenv("SERVER_PORT", 8021))
 
 browser_path = "/usr/bin/google-chrome"
-app = FastAPI()
+app = FastAPI(title="dskpp OpenAI Compatible API")
 
 
 class CookieResponse(BaseModel):
@@ -30,6 +42,41 @@ class CookieResponse(BaseModel):
     user_agent: str
 
 
+# Auth
+
+def verify_api_key(authorization: Optional[str], key_param: Optional[str]) -> bool:
+    """Verify API key from Authorization header or query param.
+    If AUTH_KEY is not set, all requests are accepted."""
+    if not AUTH_KEY:
+        return True
+
+    if authorization:
+        if authorization.startswith("Bearer "):
+            token = authorization[7:]
+            return token == AUTH_KEY
+        return authorization == AUTH_KEY
+
+    if key_param:
+        return key_param == AUTH_KEY
+
+    return False
+
+
+def check_auth(authorization: Optional[str], key_param: Optional[str]):
+    """Raise 401 if auth fails."""
+    if not verify_api_key(authorization, key_param):
+        raise HTTPException(
+            status_code=401,
+            detail=make_error_response(
+                "Invalid API key",
+                error_type="invalid_request_error",
+                code="invalid_api_key",
+            ),
+        )
+
+
+# URL safety check (original)
+
 def is_safe_url(url: str) -> bool:
     parsed_url = urlparse(url)
     ip_pattern = re.compile(
@@ -48,7 +95,7 @@ def verify_page_loaded(driver: ChromiumPage) -> bool:
     try:
         body = driver.ele('tag:body', timeout=10)
         return len(body.html) > 100
-    except:
+    except (ValueError, AttributeError):
         return False
 
 
@@ -97,9 +144,7 @@ def bypass_cloudflare(url: str, retries: int, log: bool, proxy: str = None) -> C
             raise e
 
 
-# -------------------------
-# Async wrappers for FastAPI
-# -------------------------
+# Original endpoints
 
 @app.get("/cookies", response_model=CookieResponse)
 async def get_cookies(url: str, retries: int = 5, proxy: str = None):
@@ -153,13 +198,242 @@ async def get_html(url: str, retries: int = 5, proxy: str = None):
         raise HTTPException(status_code=500, detail=str(e))
 
 
-# -------------------------
-# Main entry  (async-safe sleep)
-# -------------------------
+# OpenAI-compatible endpoints
+
+@app.get("/health")
+async def health_check():
+    return {"status": "ok"}
+
+
+@app.get("/v1/models")
+async def list_models(
+    authorization: Optional[str] = Header(None),
+    key: Optional[str] = None,
+):
+    check_auth(authorization, key)
+    return {
+        "object": "list",
+        "data": AVAILABLE_MODELS,
+    }
+
+
+@app.post("/v1/chat/completions")
+async def chat_completions(
+    request: Request,
+    authorization: Optional[str] = Header(None),
+    key: Optional[str] = None,
+):
+    check_auth(authorization, key)
+
+    try:
+        body = await request.json()
+    except Exception:
+        raise HTTPException(
+            status_code=400,
+            detail=make_error_response("Invalid JSON body"),
+        )
+
+    try:
+        req = ChatCompletionRequest(**body)
+    except Exception as e:
+        raise HTTPException(
+            status_code=400,
+            detail=make_error_response(f"Invalid request: {str(e)}"),
+        )
+
+    if not req.messages:
+        raise HTTPException(
+            status_code=400,
+            detail=make_error_response("messages is required"),
+        )
+
+    prompt = messages_to_prompt(req.messages)
+    model_config = model_to_config(req.model)
+    completion_id = generate_id()
+    created = int(time.time())
+    model_name = req.model
+
+    if req.stream:
+        return StreamingResponse(
+            stream_chat_completion(
+                prompt=prompt,
+                completion_id=completion_id,
+                created=created,
+                model_name=model_name,
+                model_config=model_config,
+            ),
+            media_type="text/event-stream",
+            headers={
+                "Cache-Control": "no-cache",
+                "Connection": "keep-alive",
+                "X-Accel-Buffering": "no",
+            },
+        )
+    else:
+        full_content = ""
+        async for chunk in _proxy_chat(prompt, model_config):
+            if chunk.get("type") == "text":
+                full_content += chunk.get("content", "")
+            elif chunk.get("finish_reason") == "stop":
+                break
+
+        prompt_tokens = estimate_tokens(prompt)
+        completion_tokens = estimate_tokens(full_content)
+
+        return {
+            "id": completion_id,
+            "object": "chat.completion",
+            "created": created,
+            "model": model_name,
+            "choices": [
+                {
+                    "index": 0,
+                    "message": {
+                        "role": "assistant",
+                        "content": full_content,
+                    },
+                    "finish_reason": "stop",
+                }
+            ],
+            "usage": {
+                "prompt_tokens": prompt_tokens,
+                "completion_tokens": completion_tokens,
+                "total_tokens": prompt_tokens + completion_tokens,
+            },
+        }
+
+
+# Streaming internals
+
+def _load_cookies() -> Dict[str, str]:
+    """Load DeepSeek cookies from disk."""
+    cookies_path = os.path.join(os.path.dirname(__file__), "dsk", "dsk", "cookies.json")
+    try:
+        with open(cookies_path, "r") as f:
+            cookie_data = json.load(f)
+            return cookie_data.get("cookies", {})
+    except (FileNotFoundError, json.JSONDecodeError, OSError):
+        return {}
+
+
+async def _proxy_chat(prompt: str, model_config: dict):
+    """Proxy chat to upstream DeepSeek WebSocket API."""
+    import websockets
+
+    uid = str(uuid.uuid4())
+    ws_url = f"wss://chat.deepseek.com/api/v0/chat/ws?uid={uid}"
+
+    cookies = _load_cookies()
+    cookie_str = "; ".join(f"{k}={v}" for k, v in cookies.items())
+
+    try:
+        async with websockets.connect(
+            ws_url,
+            additional_headers={
+                "Cookie": cookie_str,
+                "Origin": "https://chat.deepseek.com",
+                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
+            },
+        ) as ws:
+            chat_request = {
+                "type": "chat",
+                "payload": {
+                    "prompt": prompt,
+                    "thinking_enabled": model_config.get("thinking_enabled", False),
+                    "search_enabled": model_config.get("search_enabled", False),
+                },
+            }
+            await ws.send(json.dumps(chat_request))
+
+            async for message in ws:
+                try:
+                    raw = message if isinstance(message, str) else message.decode("utf-8")
+                    data = json.loads(raw)
+                except (json.JSONDecodeError, UnicodeDecodeError):
+                    continue
+
+                msg_type = data.get("type", "")
+
+                if msg_type == "chat_response":
+                    payload = data.get("payload") or {}
+                    content = payload.get("content") or ""
+                    is_end = payload.get("is_end", False)
+
+                    if content:
+                        yield {"type": "text", "content": content, "finish_reason": None}
+
+                    if is_end:
+                        yield {"type": "text", "content": "", "finish_reason": "stop"}
+                        return
+
+                elif msg_type == "error":
+                    error_msg = (data.get("payload") or {}).get("message", "Upstream error")
+                    yield {"type": "error", "content": error_msg, "finish_reason": "stop"}
+                    return
+
+            yield {"type": "text", "content": "", "finish_reason": "stop"}
+
+    except Exception as e:
+        yield {"type": "error", "content": f"Connection error: {str(e)}", "finish_reason": "stop"}
+
+
+async def stream_chat_completion(
+    prompt: str,
+    completion_id: str,
+    created: int,
+    model_name: str,
+    model_config: dict,
+):
+    """Async generator yielding SSE-formatted OpenAI streaming chunks."""
+    first_chunk = {
+        "id": completion_id,
+        "object": "chat.completion.chunk",
+        "created": created,
+        "model": model_name,
+        "choices": [{"index": 0, "delta": {"role": "assistant"}, "finish_reason": None}],
+    }
+    yield f"data: {json.dumps(first_chunk)}\n\n"
+
+    async for chunk in _proxy_chat(prompt, model_config):
+        content = chunk.get("content", "")
+        finish_reason = chunk.get("finish_reason")
+
+        if content:
+            data_chunk = {
+                "id": completion_id,
+                "object": "chat.completion.chunk",
+                "created": created,
+                "model": model_name,
+                "choices": [{"index": 0, "delta": {"content": content}, "finish_reason": None}],
+            }
+            yield f"data: {json.dumps(data_chunk)}\n\n"
+
+        if finish_reason == "stop":
+            final_chunk = {
+                "id": completion_id,
+                "object": "chat.completion.chunk",
+                "created": created,
+                "model": model_name,
+                "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
+            }
+            yield f"data: {json.dumps(final_chunk)}\n\n"
+            yield "data: [DONE]\n\n"
+            return
+
+    final_chunk = {
+        "id": completion_id,
+        "object": "chat.completion.chunk",
+        "created": created,
+        "model": model_name,
+        "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
+    }
+    yield f"data: {json.dumps(final_chunk)}\n\n"
+    yield "data: [DONE]\n\n"
+
 
 if __name__ == "__main__":
 
-    parser = argparse.ArgumentParser(description="Cloudflare bypass api")
+    parser = argparse.ArgumentParser(description="dskpp - OpenAI compatible API server")
     parser.add_argument("--nolog", action="store_true")
     parser.add_argument("--headless", action="store_true")
     args = parser.parse_args()
@@ -176,6 +450,16 @@ def cleanup():
 
         atexit.register(cleanup)
 
-    log = not args.nolog
+    print(f"\033[92m[dskpp] Starting OpenAI-compatible API server on port {SERVER_PORT}\033[0m")
+    print("\033[92m[dskpp] Endpoints:\033[0m")
+    print("\033[92m  GET  /v1/models\033[0m")
+    print("\033[92m  POST /v1/chat/completions\033[0m")
+    print("\033[92m  GET  /health\033[0m")
+    print("\033[92m  GET  /cookies\033[0m")
+    print("\033[92m  GET  /html\033[0m")
+    if AUTH_KEY:
+        print("\033[92m  Auth: enabled (AUTH_KEY set)\033[0m")
+    else:
+        print("\033[93m  Auth: disabled (AUTH_KEY not set)\033[0m")
 
     uvicorn.run(app, host="0.0.0.0", port=SERVER_PORT)
\ No newline at end of file