diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml new file mode 100644 index 00000000..7a656d6e --- /dev/null +++ b/.github/workflows/integration-tests.yml @@ -0,0 +1,40 @@ +name: Integration Tests + +on: + merge_group: + types: [checks_requested] + workflow_dispatch: + +concurrency: + group: python-integration-tests-${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + integration-tests: + runs-on: depot-ubuntu-24.04-16 + + steps: + - name: Check out repository + uses: actions/checkout@v6 + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: "3.12" + + - name: Install Poetry + run: python -m pip install --upgrade pip poetry + + - name: Show tool versions + run: | + python --version + poetry --version + + - name: Install dependencies + run: poetry install --with dev --no-interaction + + - name: Run integration tests + run: poetry run pytest tests/integration diff --git a/README.md b/README.md index 9f62088a..3e9dd8dc 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,12 @@ Both the sync and async client follow similar configuration params ### API Key The API key can be configured either from the constructor arguments or environment variables using `HYPERBROWSER_API_KEY` +If no API key is provided, the client falls back to a saved OAuth session created by `hx auth login`. By default it reads `~/.hx_config/auth/default.json`, or `~/.hx_config/auth/.json` when `HYPERBROWSER_PROFILE` or `ClientConfig(profile=...)` is set. + +Profile names must match `^[A-Za-z0-9._-]+$`. + +`base_url` and `HYPERBROWSER_BASE_URL` accept either `https://host` or `https://host/api`. The client normalizes both to the same control-plane base URL. + ## Usage ### Async diff --git a/tests/integration/test_client_http.py b/tests/integration/test_client_http.py new file mode 100644 index 00000000..48643ead --- /dev/null +++ b/tests/integration/test_client_http.py @@ -0,0 +1,151 @@ +import json +import threading +from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer + +import pytest + +from hyperbrowser import AsyncHyperbrowser, Hyperbrowser +from hyperbrowser.models.scrape import ScrapeOptions, StartScrapeJobParams + + +def _read_json_body(handler: BaseHTTPRequestHandler): + content_length = int(handler.headers.get("Content-Length", "0")) + if content_length <= 0: + return None + return json.loads(handler.rfile.read(content_length).decode("utf-8")) + + +def _send_json(handler: BaseHTTPRequestHandler, status_code: int, payload: dict) -> None: + encoded = json.dumps(payload).encode("utf-8") + handler.send_response(status_code) + handler.send_header("Content-Type", "application/json") + handler.send_header("Content-Length", str(len(encoded))) + handler.end_headers() + handler.wfile.write(encoded) + + +def _start_server(): + requests = [] + + class Handler(BaseHTTPRequestHandler): + def do_POST(self): + requests.append( + { + "method": self.command, + "path": self.path, + "api_key": self.headers.get("x-api-key"), + "content_type": self.headers.get("content-type"), + "body": _read_json_body(self), + } + ) + + if self.path == "/api/scrape": + _send_json(self, 200, {"jobId": "job_123"}) + return + + _send_json(self, 404, {"message": f"unexpected route {self.path}"}) + + def do_GET(self): + requests.append( + { + "method": self.command, + "path": self.path, + "api_key": self.headers.get("x-api-key"), + "content_type": self.headers.get("content-type"), + "body": None, + } + ) + + if self.path == "/api/scrape/job_123/status": + _send_json(self, 200, {"status": "completed"}) + return + + _send_json(self, 404, {"message": f"unexpected route {self.path}"}) + + def log_message(self, format, *args): + return + + server = ThreadingHTTPServer(("127.0.0.1", 0), Handler) + thread = threading.Thread(target=server.serve_forever, daemon=True) + thread.start() + return server, f"http://127.0.0.1:{server.server_address[1]}", requests + + +def _scrape_params() -> StartScrapeJobParams: + return StartScrapeJobParams( + url="https://example.com", + scrape_options=ScrapeOptions(formats=["markdown"]), + ) + + +def test_sync_client_uses_configured_api_endpoint_and_parses_responses(): + server, base_url, requests = _start_server() + client = Hyperbrowser(api_key="test-api-key", base_url=base_url) + try: + started = client.scrape.start(_scrape_params()) + status = client.scrape.get_status(started.job_id) + finally: + client.close() + server.shutdown() + server.server_close() + + assert started.job_id == "job_123" + assert status.status == "completed" + assert requests == [ + { + "method": "POST", + "path": "/api/scrape", + "api_key": "test-api-key", + "content_type": "application/json", + "body": { + "url": "https://example.com", + "scrapeOptions": { + "formats": ["markdown"], + }, + }, + }, + { + "method": "GET", + "path": "/api/scrape/job_123/status", + "api_key": "test-api-key", + "content_type": None, + "body": None, + }, + ] + + +@pytest.mark.anyio +async def test_async_client_uses_configured_api_endpoint_and_parses_responses(): + server, base_url, requests = _start_server() + client = AsyncHyperbrowser(api_key="test-api-key", base_url=base_url) + try: + started = await client.scrape.start(_scrape_params()) + status = await client.scrape.get_status(started.job_id) + finally: + await client.close() + server.shutdown() + server.server_close() + + assert started.job_id == "job_123" + assert status.status == "completed" + assert requests == [ + { + "method": "POST", + "path": "/api/scrape", + "api_key": "test-api-key", + "content_type": "application/json", + "body": { + "url": "https://example.com", + "scrapeOptions": { + "formats": ["markdown"], + }, + }, + }, + { + "method": "GET", + "path": "/api/scrape/job_123/status", + "api_key": "test-api-key", + "content_type": None, + "body": None, + }, + ]