Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions .github/workflows/integration-tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
name: Integration Tests

on:
merge_group:
types: [checks_requested]
workflow_dispatch:

concurrency:
group: python-integration-tests-${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

permissions:
contents: read

jobs:
integration-tests:
runs-on: depot-ubuntu-24.04-16

steps:
- name: Check out repository
uses: actions/checkout@v6

- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: "3.12"

- name: Install Poetry
run: python -m pip install --upgrade pip poetry

- name: Show tool versions
run: |
python --version
poetry --version

- name: Install dependencies
run: poetry install --with dev --no-interaction

- name: Run integration tests
run: poetry run pytest tests/integration
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@ Both the sync and async client follow similar configuration params
### API Key
The API key can be configured either from the constructor arguments or environment variables using `HYPERBROWSER_API_KEY`

If no API key is provided, the client falls back to a saved OAuth session created by `hx auth login`. By default it reads `~/.hx_config/auth/default.json`, or `~/.hx_config/auth/<profile>.json` when `HYPERBROWSER_PROFILE` or `ClientConfig(profile=...)` is set.

Profile names must match `^[A-Za-z0-9._-]+$`.

`base_url` and `HYPERBROWSER_BASE_URL` accept either `https://host` or `https://host/api`. The client normalizes both to the same control-plane base URL.

## Usage

### Async
Expand Down
151 changes: 151 additions & 0 deletions tests/integration/test_client_http.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
import json
import threading
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer

import pytest

from hyperbrowser import AsyncHyperbrowser, Hyperbrowser
from hyperbrowser.models.scrape import ScrapeOptions, StartScrapeJobParams


def _read_json_body(handler: BaseHTTPRequestHandler):
content_length = int(handler.headers.get("Content-Length", "0"))
if content_length <= 0:
return None
return json.loads(handler.rfile.read(content_length).decode("utf-8"))


def _send_json(handler: BaseHTTPRequestHandler, status_code: int, payload: dict) -> None:
encoded = json.dumps(payload).encode("utf-8")
handler.send_response(status_code)
handler.send_header("Content-Type", "application/json")
handler.send_header("Content-Length", str(len(encoded)))
handler.end_headers()
handler.wfile.write(encoded)


def _start_server():
requests = []

class Handler(BaseHTTPRequestHandler):
def do_POST(self):
requests.append(
{
"method": self.command,
"path": self.path,
"api_key": self.headers.get("x-api-key"),
"content_type": self.headers.get("content-type"),
"body": _read_json_body(self),
}
)

if self.path == "/api/scrape":
_send_json(self, 200, {"jobId": "job_123"})
return

_send_json(self, 404, {"message": f"unexpected route {self.path}"})

def do_GET(self):
requests.append(
{
"method": self.command,
"path": self.path,
"api_key": self.headers.get("x-api-key"),
"content_type": self.headers.get("content-type"),
"body": None,
}
)

if self.path == "/api/scrape/job_123/status":
_send_json(self, 200, {"status": "completed"})
return

_send_json(self, 404, {"message": f"unexpected route {self.path}"})

def log_message(self, format, *args):
return

server = ThreadingHTTPServer(("127.0.0.1", 0), Handler)
thread = threading.Thread(target=server.serve_forever, daemon=True)
thread.start()
return server, f"http://127.0.0.1:{server.server_address[1]}", requests


def _scrape_params() -> StartScrapeJobParams:
return StartScrapeJobParams(
url="https://example.com",
scrape_options=ScrapeOptions(formats=["markdown"]),
)


def test_sync_client_uses_configured_api_endpoint_and_parses_responses():
server, base_url, requests = _start_server()
client = Hyperbrowser(api_key="test-api-key", base_url=base_url)
try:
started = client.scrape.start(_scrape_params())
status = client.scrape.get_status(started.job_id)
finally:
client.close()
server.shutdown()
server.server_close()

assert started.job_id == "job_123"
assert status.status == "completed"
assert requests == [
{
"method": "POST",
"path": "/api/scrape",
"api_key": "test-api-key",
"content_type": "application/json",
"body": {
"url": "https://example.com",
"scrapeOptions": {
"formats": ["markdown"],
},
},
},
{
"method": "GET",
"path": "/api/scrape/job_123/status",
"api_key": "test-api-key",
"content_type": None,
"body": None,
},
]


@pytest.mark.anyio
async def test_async_client_uses_configured_api_endpoint_and_parses_responses():
server, base_url, requests = _start_server()
client = AsyncHyperbrowser(api_key="test-api-key", base_url=base_url)
try:
started = await client.scrape.start(_scrape_params())
status = await client.scrape.get_status(started.job_id)
finally:
await client.close()
server.shutdown()
server.server_close()

assert started.job_id == "job_123"
assert status.status == "completed"
assert requests == [
{
"method": "POST",
"path": "/api/scrape",
"api_key": "test-api-key",
"content_type": "application/json",
"body": {
"url": "https://example.com",
"scrapeOptions": {
"formats": ["markdown"],
},
},
},
{
"method": "GET",
"path": "/api/scrape/job_123/status",
"api_key": "test-api-key",
"content_type": None,
"body": None,
},
]