From 79ca20fd79ef9ab21788b1dc17bfd4627c827007 Mon Sep 17 00:00:00 2001 From: Ninad Sinha Date: Mon, 11 May 2026 16:54:48 -0700 Subject: [PATCH 1/2] Add integration tests --- .github/workflows/integration-tests.yml | 40 +++++++ tests/integration/test_client_http.py | 151 ++++++++++++++++++++++++ 2 files changed, 191 insertions(+) create mode 100644 .github/workflows/integration-tests.yml create mode 100644 tests/integration/test_client_http.py diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml new file mode 100644 index 00000000..914890cd --- /dev/null +++ b/.github/workflows/integration-tests.yml @@ -0,0 +1,40 @@ +name: Integration Tests + +on: + pull_request: + push: + branches: [main] + +concurrency: + group: python-integration-tests-${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + integration-tests: + runs-on: ubuntu-latest + + steps: + - name: Check out repository + uses: actions/checkout@v6 + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: "3.12" + + - name: Install Poetry + run: python -m pip install --upgrade pip poetry + + - name: Show tool versions + run: | + python --version + poetry --version + + - name: Install dependencies + run: poetry install --with dev --no-interaction + + - name: Run integration tests + run: poetry run pytest tests/integration diff --git a/tests/integration/test_client_http.py b/tests/integration/test_client_http.py new file mode 100644 index 00000000..48643ead --- /dev/null +++ b/tests/integration/test_client_http.py @@ -0,0 +1,151 @@ +import json +import threading +from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer + +import pytest + +from hyperbrowser import AsyncHyperbrowser, Hyperbrowser +from hyperbrowser.models.scrape import ScrapeOptions, StartScrapeJobParams + + +def _read_json_body(handler: BaseHTTPRequestHandler): + content_length = int(handler.headers.get("Content-Length", "0")) + if content_length <= 0: + return None + return json.loads(handler.rfile.read(content_length).decode("utf-8")) + + +def _send_json(handler: BaseHTTPRequestHandler, status_code: int, payload: dict) -> None: + encoded = json.dumps(payload).encode("utf-8") + handler.send_response(status_code) + handler.send_header("Content-Type", "application/json") + handler.send_header("Content-Length", str(len(encoded))) + handler.end_headers() + handler.wfile.write(encoded) + + +def _start_server(): + requests = [] + + class Handler(BaseHTTPRequestHandler): + def do_POST(self): + requests.append( + { + "method": self.command, + "path": self.path, + "api_key": self.headers.get("x-api-key"), + "content_type": self.headers.get("content-type"), + "body": _read_json_body(self), + } + ) + + if self.path == "/api/scrape": + _send_json(self, 200, {"jobId": "job_123"}) + return + + _send_json(self, 404, {"message": f"unexpected route {self.path}"}) + + def do_GET(self): + requests.append( + { + "method": self.command, + "path": self.path, + "api_key": self.headers.get("x-api-key"), + "content_type": self.headers.get("content-type"), + "body": None, + } + ) + + if self.path == "/api/scrape/job_123/status": + _send_json(self, 200, {"status": "completed"}) + return + + _send_json(self, 404, {"message": f"unexpected route {self.path}"}) + + def log_message(self, format, *args): + return + + server = ThreadingHTTPServer(("127.0.0.1", 0), Handler) + thread = threading.Thread(target=server.serve_forever, daemon=True) + thread.start() + return server, f"http://127.0.0.1:{server.server_address[1]}", requests + + +def _scrape_params() -> StartScrapeJobParams: + return StartScrapeJobParams( + url="https://example.com", + scrape_options=ScrapeOptions(formats=["markdown"]), + ) + + +def test_sync_client_uses_configured_api_endpoint_and_parses_responses(): + server, base_url, requests = _start_server() + client = Hyperbrowser(api_key="test-api-key", base_url=base_url) + try: + started = client.scrape.start(_scrape_params()) + status = client.scrape.get_status(started.job_id) + finally: + client.close() + server.shutdown() + server.server_close() + + assert started.job_id == "job_123" + assert status.status == "completed" + assert requests == [ + { + "method": "POST", + "path": "/api/scrape", + "api_key": "test-api-key", + "content_type": "application/json", + "body": { + "url": "https://example.com", + "scrapeOptions": { + "formats": ["markdown"], + }, + }, + }, + { + "method": "GET", + "path": "/api/scrape/job_123/status", + "api_key": "test-api-key", + "content_type": None, + "body": None, + }, + ] + + +@pytest.mark.anyio +async def test_async_client_uses_configured_api_endpoint_and_parses_responses(): + server, base_url, requests = _start_server() + client = AsyncHyperbrowser(api_key="test-api-key", base_url=base_url) + try: + started = await client.scrape.start(_scrape_params()) + status = await client.scrape.get_status(started.job_id) + finally: + await client.close() + server.shutdown() + server.server_close() + + assert started.job_id == "job_123" + assert status.status == "completed" + assert requests == [ + { + "method": "POST", + "path": "/api/scrape", + "api_key": "test-api-key", + "content_type": "application/json", + "body": { + "url": "https://example.com", + "scrapeOptions": { + "formats": ["markdown"], + }, + }, + }, + { + "method": "GET", + "path": "/api/scrape/job_123/status", + "api_key": "test-api-key", + "content_type": None, + "body": None, + }, + ] From 3124d86b20f1d11bced271ce04d736a9ca69ef7f Mon Sep 17 00:00:00 2001 From: Ninad Sinha Date: Mon, 11 May 2026 17:28:22 -0700 Subject: [PATCH 2/2] Update workflow to use merge_queues --- .github/workflows/integration-tests.yml | 8 ++++---- README.md | 6 ++++++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index 914890cd..7a656d6e 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -1,9 +1,9 @@ name: Integration Tests on: - pull_request: - push: - branches: [main] + merge_group: + types: [checks_requested] + workflow_dispatch: concurrency: group: python-integration-tests-${{ github.workflow }}-${{ github.ref }} @@ -14,7 +14,7 @@ permissions: jobs: integration-tests: - runs-on: ubuntu-latest + runs-on: depot-ubuntu-24.04-16 steps: - name: Check out repository diff --git a/README.md b/README.md index 9f62088a..3e9dd8dc 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,12 @@ Both the sync and async client follow similar configuration params ### API Key The API key can be configured either from the constructor arguments or environment variables using `HYPERBROWSER_API_KEY` +If no API key is provided, the client falls back to a saved OAuth session created by `hx auth login`. By default it reads `~/.hx_config/auth/default.json`, or `~/.hx_config/auth/.json` when `HYPERBROWSER_PROFILE` or `ClientConfig(profile=...)` is set. + +Profile names must match `^[A-Za-z0-9._-]+$`. + +`base_url` and `HYPERBROWSER_BASE_URL` accept either `https://host` or `https://host/api`. The client normalizes both to the same control-plane base URL. + ## Usage ### Async