diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index b7d9d54..5355c05 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -43,7 +43,7 @@ jobs: popd # Start LDAP - source start_LDAP.sh + source continuous_integration/scripts/start_LDAP.sh # These packages are installed in the base environment but may be older # versions. Explicitly upgrade them because they often create diff --git a/bluesky_httpserver/authentication.py b/bluesky_httpserver/_authentication.py similarity index 68% rename from bluesky_httpserver/authentication.py rename to bluesky_httpserver/_authentication.py index 9772974..c1144f5 100644 --- a/bluesky_httpserver/authentication.py +++ b/bluesky_httpserver/_authentication.py @@ -1,5 +1,4 @@ import asyncio -import enum import hashlib import secrets import uuid as uuid_module @@ -7,12 +6,13 @@ from datetime import datetime, timedelta from typing import Optional -from fastapi import APIRouter, Depends, HTTPException, Request, Response, Security, WebSocket +from fastapi import APIRouter, Depends, Form, HTTPException, Query, Request, Response, Security, WebSocket from fastapi.openapi.models import APIKey, APIKeyIn -from fastapi.responses import JSONResponse +from fastapi.responses import HTMLResponse, JSONResponse, RedirectResponse from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm, SecurityScopes from fastapi.security.api_key import APIKeyBase, APIKeyCookie, APIKeyQuery from fastapi.security.utils import get_authorization_scheme_param +from sqlalchemy.exc import IntegrityError # To hide third-party warning # .../jose/backends/cryptography_backend.py:18: CryptographyDeprecationWarning: @@ -34,7 +34,14 @@ from .authorization._defaults import _DEFAULT_ANONYMOUS_PROVIDER_NAME from .core import json_or_msgpack from .database import orm -from .database.core import create_user, latest_principal_activity, lookup_valid_api_key, lookup_valid_session +from .database.core import ( + create_user, + latest_principal_activity, + lookup_valid_api_key, + lookup_valid_pending_session_by_device_code, + lookup_valid_pending_session_by_user_code, + lookup_valid_session, +) from .settings import get_sessionmaker, get_settings from .utils import ( API_KEY_COOKIE_NAME, @@ -49,17 +56,16 @@ ALGORITHM = "HS256" UNIT_SECOND = timedelta(seconds=1) +# Device code flow constants +DEVICE_CODE_MAX_AGE = timedelta(minutes=10) +DEVICE_CODE_POLLING_INTERVAL = 5 # seconds + def utcnow(): "UTC now with second resolution" return datetime.utcnow().replace(microsecond=0) -class Mode(enum.Enum): - password = "password" - external = "external" - - class Token(BaseModel): access_token: str token_type: str @@ -455,7 +461,8 @@ async def auth_code( api_access_manager=Depends(get_api_access_manager), ): request.state.endpoint = "auth" - username = await authenticator.authenticate(request) + user_session_state = await authenticator.authenticate(request) + username = user_session_state.user_name if user_session_state else None if username and api_access_manager.is_user_known(username): scopes = api_access_manager.get_user_scopes(username) @@ -484,7 +491,10 @@ async def handle_credentials( api_access_manager=Depends(get_api_access_manager), ): request.state.endpoint = "auth" - username = await authenticator.authenticate(username=form_data.username, password=form_data.password) + user_session_state = await authenticator.authenticate( + username=form_data.username, password=form_data.password + ) + username = user_session_state.user_name if user_session_state else None err_msg = None if not username: @@ -507,6 +517,437 @@ async def handle_credentials( return handle_credentials +def create_pending_session(db): + """ + Create a pending session for device code flow. + + Returns a dict with 'user_code' (user-facing code) and 'device_code' (for polling). + """ + device_code = secrets.token_bytes(32) + hashed_device_code = hashlib.sha256(device_code).digest() + for _ in range(3): + user_code = secrets.token_hex(4).upper() # 8 digit code + pending_session = orm.PendingSession( + user_code=user_code, + hashed_device_code=hashed_device_code, + expiration_time=utcnow() + DEVICE_CODE_MAX_AGE, + ) + db.add(pending_session) + try: + db.commit() + except IntegrityError: + # Since the user_code is short, we cannot completely dismiss the + # possibility of a collision. Retry. + db.rollback() + continue + break + formatted_user_code = f"{user_code[:4]}-{user_code[4:]}" + return { + "user_code": formatted_user_code, + "device_code": device_code.hex(), + } + + +def build_authorize_route(authenticator, provider): + """Build a GET route that redirects the browser to the OIDC provider for authentication.""" + + async def authorize_redirect( + request: Request, + state: Optional[str] = Query(None), + ): + """Redirect browser to OAuth provider for authentication.""" + redirect_uri = f"{get_base_url(request)}/auth/provider/{provider}/code" + + params = { + "client_id": authenticator.client_id, + "response_type": "code", + "scope": "openid profile email", + "redirect_uri": redirect_uri, + } + if state: + params["state"] = state + + auth_url = authenticator.authorization_endpoint.copy_with(params=params) + return RedirectResponse(url=str(auth_url)) + + return authorize_redirect + + +def build_device_code_authorize_route(authenticator, provider): + """Build a POST route that initiates the device code flow for CLI/headless clients.""" + + async def device_code_authorize( + request: Request, + settings: BaseSettings = Depends(get_settings), + ): + """ + Initiate device code flow. + + Returns authorization_uri for the user to visit in browser, + and device_code + user_code for the CLI client to poll. + """ + request.state.endpoint = "auth" + with get_sessionmaker(settings.database_settings)() as db: + pending_session = create_pending_session(db) + + verification_uri = f"{get_base_url(request)}/auth/provider/{provider}/token" + authorization_uri = authenticator.authorization_endpoint.copy_with( + params={ + "client_id": authenticator.client_id, + "response_type": "code", + "scope": "openid profile email", + "redirect_uri": f"{get_base_url(request)}/auth/provider/{provider}/device_code", + "state": pending_session["user_code"].replace("-", ""), + } + ) + return { + "authorization_uri": str(authorization_uri), # URL that user should visit in browser + "verification_uri": str(verification_uri), # URL that terminal client will poll + "interval": DEVICE_CODE_POLLING_INTERVAL, # suggested polling interval + "device_code": pending_session["device_code"], + "expires_in": int(DEVICE_CODE_MAX_AGE.total_seconds()), # seconds + "user_code": pending_session["user_code"], + } + + return device_code_authorize + + +async def _complete_device_code_authorization( + request: Request, + authenticator, + provider: str, + code: str, + user_code: str, + settings: BaseSettings, + api_access_manager, +): + request.state.endpoint = "auth" + action = f"{get_base_url(request)}/auth/provider/{provider}/device_code?code={code}" + normalized_user_code = user_code.upper().replace("-", "").strip() + + with get_sessionmaker(settings.database_settings)() as db: + pending_session = lookup_valid_pending_session_by_user_code(db, normalized_user_code) + if pending_session is None: + error_html = f""" + + +Error + + + +

Authorization Failed

+
+ Invalid user code. It may have been mistyped, or the pending request may have expired. +
+
Try again + + +""" + return HTMLResponse(content=error_html, status_code=401) + + # Authenticate with the OIDC provider using the authorization code + user_session_state = await authenticator.authenticate(request) + if not user_session_state: + error_html = """ + + +Authentication Failed + + + +

Authentication Failed

+
+ User code was correct but authentication with the identity provider failed. + Please contact the administrator. +
+ + +""" + return HTMLResponse(content=error_html, status_code=401) + + username = user_session_state.user_name + if not api_access_manager.is_user_known(username): + error_html = f""" + + +Authorization Failed + + + +

Authorization Failed

+
User '{username}' is not authorized to access this server.
+ + +""" + return HTMLResponse(content=error_html, status_code=403) + + # Create the session + session = await asyncio.get_running_loop().run_in_executor( + None, _create_session_orm, settings, provider, username, db + ) + + # Link the pending session to the real session + pending_session.session_id = session.id + db.add(pending_session) + db.commit() + + success_html = f""" + + +Success + + + +

Success!

+
+ You have been authenticated. Return to your terminal application - + within {DEVICE_CODE_POLLING_INTERVAL} seconds it should be successfully logged in. +
+ + +""" + return HTMLResponse(content=success_html) + + +def build_device_code_form_route(authenticator, provider): + """Build a GET route that shows the user code entry form.""" + + async def device_code_form( + request: Request, + code: str, + state: Optional[str] = Query(None), + settings: BaseSettings = Depends(get_settings), + api_access_manager=Depends(get_api_access_manager), + ): + """Show form for user to enter user code after browser auth.""" + if state: + return await _complete_device_code_authorization( + request=request, + authenticator=authenticator, + provider=provider, + code=code, + user_code=state, + settings=settings, + api_access_manager=api_access_manager, + ) + + action = f"{get_base_url(request)}/auth/provider/{provider}/device_code?code={code}" + html_content = f""" + + + + Authorize Session + + + +

Authorize Bluesky HTTP Server Session

+
+ + + +
+ +
+ + +""" + return HTMLResponse(content=html_content) + + return device_code_form + + +def build_device_code_submit_route(authenticator, provider): + """Build a POST route that handles user code submission after browser auth.""" + + async def device_code_submit( + request: Request, + code: str = Form(), + user_code: str = Form(), + settings: BaseSettings = Depends(get_settings), + api_access_manager=Depends(get_api_access_manager), + ): + """Handle user code submission and link to authenticated session.""" + return await _complete_device_code_authorization( + request=request, + authenticator=authenticator, + provider=provider, + code=code, + user_code=user_code, + settings=settings, + api_access_manager=api_access_manager, + ) + + return device_code_submit + + +def _create_session_orm(settings, identity_provider, id, db): + """ + Create a session and return the ORM object (for device code flow). + + Unlike create_session(), this returns the ORM object so we can link it + to the pending session. + """ + # Have we seen this Identity before? + identity = ( + db.query(orm.Identity) + .filter(orm.Identity.id == id) + .filter(orm.Identity.provider == identity_provider) + .first() + ) + now = utcnow() + if identity is None: + # We have not. Make a new Principal and link this new Identity to it. + principal = create_user(db, identity_provider, id) + (new_identity,) = principal.identities + new_identity.latest_login = now + else: + identity.latest_login = now + principal = identity.principal + + session = orm.Session( + principal_id=principal.id, + expiration_time=utcnow() + settings.session_max_age, + ) + db.add(session) + db.commit() + db.refresh(session) + return session + + +def build_device_code_token_route(authenticator, provider): + """Build a POST route for the CLI client to poll for tokens.""" + + async def device_code_token( + request: Request, + body: schemas.DeviceCode, + settings: BaseSettings = Depends(get_settings), + api_access_manager=Depends(get_api_access_manager), + ): + """ + Poll for tokens after device code flow authentication. + + Returns tokens if the user has authenticated, or 400 with + 'authorization_pending' error if still waiting. + """ + request.state.endpoint = "auth" + device_code_hex = body.device_code + try: + device_code = bytes.fromhex(device_code_hex) + except Exception: + # Not valid hex, therefore not a valid device_code + raise HTTPException(status_code=401, detail="Invalid device code") + + with get_sessionmaker(settings.database_settings)() as db: + pending_session = lookup_valid_pending_session_by_device_code(db, device_code) + if pending_session is None: + raise HTTPException( + status_code=404, + detail="No such device_code. The pending request may have expired.", + ) + if pending_session.session_id is None: + raise HTTPException(status_code=400, detail={"error": "authorization_pending"}) + + session = pending_session.session + principal = session.principal + + # Get scopes for the user + # Find an identity to get the username + identity = db.query(orm.Identity).filter(orm.Identity.principal_id == principal.id).first() + if identity and api_access_manager.is_user_known(identity.id): + scopes = api_access_manager.get_user_scopes(identity.id) + else: + scopes = set() + + # The pending session can only be used once + db.delete(pending_session) + db.commit() + + # Generate tokens + data = { + "sub": principal.uuid.hex, + "sub_typ": principal.type.value, + "scp": list(scopes), + "ids": [{"id": ident.id, "idp": ident.provider} for ident in principal.identities], + } + access_token = create_access_token( + data=data, + expires_delta=settings.access_token_max_age, + secret_key=settings.secret_keys[0], + ) + refresh_token = create_refresh_token( + session_id=session.uuid.hex, + expires_delta=settings.refresh_token_max_age, + secret_key=settings.secret_keys[0], + ) + + return { + "access_token": access_token, + "expires_in": int(settings.access_token_max_age / UNIT_SECOND), + "refresh_token": refresh_token, + "refresh_token_expires_in": int(settings.refresh_token_max_age / UNIT_SECOND), + "token_type": "bearer", + } + + return device_code_token + + def generate_apikey(db, principal, apikey_params, request, allowed_scopes, source_api_key_scopes): # Use API key scopes if API key is generated based on existing API key, otherwise used allowed scopes if (source_api_key_scopes is not None) and ("inherit" not in source_api_key_scopes): diff --git a/bluesky_httpserver/app.py b/bluesky_httpserver/app.py index f09acb3..0d96667 100644 --- a/bluesky_httpserver/app.py +++ b/bluesky_httpserver/app.py @@ -15,7 +15,7 @@ from fastapi.middleware.cors import CORSMiddleware from fastapi.openapi.utils import get_openapi -from .authentication import Mode +from .authentication import ExternalAuthenticator, InternalAuthenticator from .console_output import CollectPublishedConsoleOutput, ConsoleOutputStream, SystemInfoStream from .core import PatchedStreamingResponse from .database.core import purge_expired @@ -160,6 +160,11 @@ def build_app(authentication=None, api_access=None, resource_access=None, server from .authentication import ( base_authentication_router, build_auth_code_route, + build_authorize_route, + build_device_code_authorize_route, + build_device_code_form_route, + build_device_code_submit_route, + build_device_code_token_route, build_handle_credentials_route, oauth2_scheme, ) @@ -179,20 +184,41 @@ def build_app(authentication=None, api_access=None, resource_access=None, server for spec in authentication["providers"]: provider = spec["provider"] authenticator = spec["authenticator"] - mode = authenticator.mode - if mode == Mode.password: + if isinstance(authenticator, InternalAuthenticator): authentication_router.post(f"/provider/{provider}/token")( build_handle_credentials_route(authenticator, provider) ) - elif mode == Mode.external: + elif isinstance(authenticator, ExternalAuthenticator): + # Standard OAuth callback route (authorization code flow) authentication_router.get(f"/provider/{provider}/code")( build_auth_code_route(authenticator, provider) ) authentication_router.post(f"/provider/{provider}/code")( build_auth_code_route(authenticator, provider) ) + # Device code flow routes for CLI/headless clients + # GET /authorize - redirects browser to OIDC provider + authentication_router.get(f"/provider/{provider}/authorize")( + build_authorize_route(authenticator, provider) + ) + # POST /authorize - initiates device code flow (returns device_code, user_code, etc.) + authentication_router.post(f"/provider/{provider}/authorize")( + build_device_code_authorize_route(authenticator, provider) + ) + # GET /device_code - shows user code entry form + authentication_router.get(f"/provider/{provider}/device_code")( + build_device_code_form_route(authenticator, provider) + ) + # POST /device_code - handles user code submission after browser auth + authentication_router.post(f"/provider/{provider}/device_code")( + build_device_code_submit_route(authenticator, provider) + ) + # POST /token - CLI client polls this for tokens + authentication_router.post(f"/provider/{provider}/token")( + build_device_code_token_route(authenticator, provider) + ) else: - raise ValueError(f"unknown authentication mode {mode}") + raise ValueError(f"unknown authenticator type {type(authenticator)}") for custom_router in getattr(authenticator, "include_routers", []): authentication_router.include_router(custom_router, prefix=f"/provider/{provider}") diff --git a/bluesky_httpserver/authentication/__init__.py b/bluesky_httpserver/authentication/__init__.py new file mode 100644 index 0000000..85d835e --- /dev/null +++ b/bluesky_httpserver/authentication/__init__.py @@ -0,0 +1,35 @@ +from .._authentication import ( + base_authentication_router, + build_auth_code_route, + build_authorize_route, + build_device_code_authorize_route, + build_device_code_form_route, + build_device_code_submit_route, + build_device_code_token_route, + build_handle_credentials_route, + get_current_principal, + get_current_principal_websocket, + oauth2_scheme, +) +from .authenticator_base import ( + ExternalAuthenticator, + InternalAuthenticator, + UserSessionState, +) + +__all__ = [ + "ExternalAuthenticator", + "InternalAuthenticator", + "UserSessionState", + "get_current_principal", + "get_current_principal_websocket", + "base_authentication_router", + "build_auth_code_route", + "build_authorize_route", + "build_device_code_authorize_route", + "build_device_code_form_route", + "build_device_code_submit_route", + "build_device_code_token_route", + "build_handle_credentials_route", + "oauth2_scheme", +] diff --git a/bluesky_httpserver/authentication/authenticator_base.py b/bluesky_httpserver/authentication/authenticator_base.py new file mode 100644 index 0000000..af103c5 --- /dev/null +++ b/bluesky_httpserver/authentication/authenticator_base.py @@ -0,0 +1,37 @@ +from abc import ABC +from dataclasses import dataclass +from typing import Optional + +from fastapi import Request + + +@dataclass +class UserSessionState: + """Data transfer class to communicate custom session state information.""" + + user_name: str + state: dict = None + + +class InternalAuthenticator(ABC): + """ + Base class for authenticators that use username/password credentials. + + Subclasses must implement the authenticate method which takes a username + and password and returns a UserSessionState on success or None on failure. + """ + + async def authenticate(self, username: str, password: str) -> Optional[UserSessionState]: + raise NotImplementedError + + +class ExternalAuthenticator(ABC): + """ + Base class for authenticators that use external identity providers. + + Subclasses must implement the authenticate method which takes a FastAPI + Request object and returns a UserSessionState on success or None on failure. + """ + + async def authenticate(self, request: Request) -> Optional[UserSessionState]: + raise NotImplementedError diff --git a/bluesky_httpserver/authenticators.py b/bluesky_httpserver/authenticators.py index 61c2da4..a58fedf 100644 --- a/bluesky_httpserver/authenticators.py +++ b/bluesky_httpserver/authenticators.py @@ -1,21 +1,32 @@ import asyncio +import base64 import functools import logging import re import secrets from collections.abc import Iterable +from datetime import timedelta +from typing import Any, List, Mapping, Optional, cast +import httpx +from cachetools import TTLCache, cached from fastapi import APIRouter, Request -from jose import JWTError, jwk, jwt +from fastapi.security import OAuth2, OAuth2AuthorizationCodeBearer +from jose import JWTError, jwt +from pydantic import Secret from starlette.responses import RedirectResponse -from .authentication import Mode -from .utils import modules_available +from .authentication import ( + ExternalAuthenticator, + InternalAuthenticator, + UserSessionState, +) +from .utils import get_root_url, modules_available logger = logging.getLogger(__name__) -class DummyAuthenticator: +class DummyAuthenticator(InternalAuthenticator): """ For test and demo purposes only! @@ -23,26 +34,20 @@ class DummyAuthenticator: """ - mode = Mode.password + def __init__(self, confirmation_message: str = ""): + self.confirmation_message = confirmation_message - async def authenticate(self, username: str, password: str): - return username + async def authenticate(self, username: str, password: str) -> UserSessionState: + return UserSessionState(username, {}) -class DictionaryAuthenticator: +class DictionaryAuthenticator(InternalAuthenticator): """ For test and demo purposes only! Check passwords from a dictionary of usernames mapped to passwords. - - Parameters - ---------- - - users_to_passwords: dict(str, str) - Mapping of usernames to passwords. """ - mode = Mode.password configuration_schema = """ $schema": http://json-schema.org/draft-07/schema# type: object @@ -50,25 +55,28 @@ class DictionaryAuthenticator: properties: users_to_password: type: object - description: | - Mapping usernames to password. Environment variable expansion should be - used to avoid placing passwords directly in configuration. + description: | + Mapping usernames to password. Environment variable expansion should be + used to avoid placing passwords directly in configuration. + confirmation_message: + type: string + description: May be displayed by client after successful login. """ - def __init__(self, users_to_passwords): + def __init__(self, users_to_passwords: Mapping[str, str], confirmation_message: str = ""): self._users_to_passwords = users_to_passwords + self.confirmation_message = confirmation_message - async def authenticate(self, username: str, password: str): + async def authenticate(self, username: str, password: str) -> Optional[UserSessionState]: true_password = self._users_to_passwords.get(username) if not true_password: # Username is not valid. - return + return None if secrets.compare_digest(true_password, password): - return username + return UserSessionState(username, {}) -class PAMAuthenticator: - mode = Mode.password +class PAMAuthenticator(InternalAuthenticator): configuration_schema = """ $schema": http://json-schema.org/draft-07/schema# type: object @@ -77,154 +85,244 @@ class PAMAuthenticator: service: type: string description: PAM service. Default is 'login'. + confirmation_message: + type: string + description: May be displayed by client after successful login. """ - def __init__(self, service="login"): + def __init__(self, service: str = "login", confirmation_message: str = ""): if not modules_available("pamela"): raise ModuleNotFoundError("This PAMAuthenticator requires the module 'pamela' to be installed.") self.service = service + self.confirmation_message = confirmation_message # TODO Try to open a PAM session. - async def authenticate(self, username: str, password: str): + async def authenticate(self, username: str, password: str) -> Optional[UserSessionState]: import pamela try: pamela.authenticate(username, password, service=self.service) + return UserSessionState(username, {}) except pamela.PAMError: # Authentication failed. - return - else: - return username + return None -class OIDCAuthenticator: - mode = Mode.external +class OIDCAuthenticator(ExternalAuthenticator): configuration_schema = """ $schema": http://json-schema.org/draft-07/schema# type: object additionalProperties: false properties: + audience: + type: string client_id: type: string client_secret: type: string - redirect_uri: + well_known_uri: type: string - token_uri: + confirmation_message: type: string - authorization_endpoint: + redirect_on_success: + type: string + redirect_on_failure: type: string - public_keys: - type: array - item: - type: object - properties: - - alg: - type: string - - e - type: string - - kid - type: string - - kty - type: string - - n - type: string - - use - type: string - required: - - alg - - e - - kid - - kty - - n - - use """ def __init__( self, - client_id, - client_secret, - redirect_uri, - public_keys, - token_uri, - authorization_endpoint, - confirmation_message, + audience: str, + client_id: str, + client_secret: str, + well_known_uri: str, + confirmation_message: str = "", + redirect_on_success: Optional[str] = None, + redirect_on_failure: Optional[str] = None, ): - self.client_id = client_id - self.client_secret = client_secret + self._audience = audience + self._client_id = client_id + self._client_secret = Secret(client_secret) + self._well_known_url = well_known_uri self.confirmation_message = confirmation_message - self.redirect_uri = redirect_uri - self.public_keys = public_keys - self.token_uri = token_uri - self.authorization_endpoint = authorization_endpoint.format(client_id=client_id, redirect_uri=redirect_uri) - - async def authenticate(self, request): - code = request.query_params["code"] - response = await exchange_code(self.token_uri, code, self.client_id, self.client_secret, self.redirect_uri) + self.redirect_on_success = redirect_on_success + self.redirect_on_failure = redirect_on_failure + + @functools.cached_property + def _config_from_oidc_url(self) -> dict[str, Any]: + response: httpx.Response = httpx.get(self._well_known_url) + response.raise_for_status() + return response.json() + + @functools.cached_property + def client_id(self) -> str: + return self._client_id + + @functools.cached_property + def id_token_signing_alg_values_supported(self) -> list[str]: + return cast( + list[str], + self._config_from_oidc_url.get("id_token_signing_alg_values_supported"), + ) + + @functools.cached_property + def issuer(self) -> str: + return cast(str, self._config_from_oidc_url.get("issuer")) + + @functools.cached_property + def jwks_uri(self) -> str: + return cast(str, self._config_from_oidc_url.get("jwks_uri")) + + @functools.cached_property + def token_endpoint(self) -> str: + return cast(str, self._config_from_oidc_url.get("token_endpoint")) + + @functools.cached_property + def authorization_endpoint(self) -> httpx.URL: + return httpx.URL(cast(str, self._config_from_oidc_url.get("authorization_endpoint"))) + + @functools.cached_property + def device_authorization_endpoint(self) -> str: + return cast(str, self._config_from_oidc_url.get("device_authorization_endpoint")) + + @functools.cached_property + def end_session_endpoint(self) -> str: + return cast(str, self._config_from_oidc_url.get("end_session_endpoint")) + + @cached(TTLCache(maxsize=1, ttl=timedelta(days=7).total_seconds())) + def keys(self) -> List[str]: + return httpx.get(self.jwks_uri).raise_for_status().json().get("keys", []) + + def decode_token(self, token: str) -> dict[str, Any]: + return jwt.decode( + token, + key=self.keys(), + algorithms=self.id_token_signing_alg_values_supported, + audience=self._audience, + issuer=self.issuer, + ) + + async def authenticate(self, request: Request) -> Optional[UserSessionState]: + code = request.query_params.get("code") + if not code: + logger.warning("Authentication failed: No authorization code parameter provided.") + return None + # A proxy in the middle may make the request into something like + # 'http://localhost:8000/...' so we fix the first part but keep + # the original URI path. + redirect_uri = f"{get_root_url(request)}{request.url.path}" + response = await exchange_code( + self.token_endpoint, + code, + self._client_id, + self._client_secret.get_secret_value(), + redirect_uri, + ) response_body = response.json() if response.is_error: logger.error("Authentication error: %r", response_body) return None - response_body = response.json() id_token = response_body["id_token"] - access_token = response_body["access_token"] - # Match the kid in id_token to a key in the list of public_keys. - key = find_key(id_token, self.public_keys) + # NOTE: We decode the id_token, not access_token, because: + # 1. The id_token is the OIDC identity assertion meant for the client + # 2. Some providers (like Microsoft Entra) return opaque access_tokens + # that cannot be decoded with the JWKS keys when the resource is + # a first-party Microsoft API (e.g., Graph API with User.Read scope) try: - verified_body = jwt.decode(id_token, key, access_token=access_token, audience=self.client_id) + verified_body = self.decode_token(id_token) except JWTError: logger.exception( "Authentication error. Unverified token: %r", jwt.get_unverified_claims(id_token), ) return None - return verified_body["sub"] - - -class KeyNotFoundError(Exception): - pass - - -def find_key(token, keys): - """ - Find a key from the configured keys based on the kid claim of the token - - Parameters - ---------- - token : token to search for the kid from - keys: list of keys + # Use preferred_username as the user identifier, extracting just the username + # part if it's in email format (user@domain.com -> user) + preferred_username = verified_body.get("preferred_username") + if preferred_username and "@" in preferred_username: + user_id = preferred_username.split("@")[0] + elif preferred_username: + user_id = preferred_username + else: + user_id = verified_body["sub"] + logger.info( + "OIDC authentication successful. user_id=%r (sub=%r, preferred_username=%r, email=%r, name=%r)", + user_id, + verified_body.get("sub"), + verified_body.get("preferred_username"), + verified_body.get("email"), + verified_body.get("name"), + ) + return UserSessionState(user_id, {}) - Raises - ------ - KeyNotFoundError: - returned if the token does not have a kid claim - Returns - ------ - key: found key object - """ +class ProxiedOIDCAuthenticator(OIDCAuthenticator): + configuration_schema = """ +$schema": http://json-schema.org/draft-07/schema# +type: object +additionalProperties: false +properties: + audience: + type: string + client_id: + type: string + well_known_uri: + type: string + scopes: + type: array + items: + type: string + description: | + Optional list of OAuth2 scopes to request. If provided, authorization + should be enforced by an external policy agent (for example ExternalPolicyDecisionPoint) + rather than by this authenticator. + device_flow_client_id: + type: string + confirmation_message: + type: string +""" - unverified = jwt.get_unverified_header(token) - kid = unverified.get("kid") - if not kid: - raise KeyNotFoundError("No 'kid' in token") + def __init__( + self, + audience: str, + client_id: str, + well_known_uri: str, + device_flow_client_id: str, + scopes: Optional[List[str]] = None, + confirmation_message: str = "", + ): + super().__init__( + audience=audience, + client_id=client_id, + client_secret="", + well_known_uri=well_known_uri, + confirmation_message=confirmation_message, + ) + self.scopes = scopes + self.device_flow_client_id = device_flow_client_id + self._oidc_bearer = OAuth2AuthorizationCodeBearer( + authorizationUrl=str(self.authorization_endpoint), + tokenUrl=self.token_endpoint, + ) - for key in keys: - if key["kid"] == kid: - return jwk.construct(key) - return KeyNotFoundError(f"Token specifies {kid} but we have {[k['kid'] for k in keys]}") + @property + def oauth2_schema(self) -> OAuth2: + return self._oidc_bearer -async def exchange_code(token_uri, auth_code, client_id, client_secret, redirect_uri): +async def exchange_code( + token_uri: str, + auth_code: str, + client_id: str, + client_secret: str, + redirect_uri: str, +) -> httpx.Response: """Method that talks to an IdP to exchange a code for an access_token and/or id_token Args: token_url ([type]): [description] auth_code ([type]): [description] """ - if not modules_available("httpx"): - raise ModuleNotFoundError("This authenticator requires 'httpx'. (pip install httpx)") - import httpx - + auth_value = base64.b64encode(f"{client_id}:{client_secret}".encode()).decode() response = httpx.post( url=token_uri, data={ @@ -234,18 +332,18 @@ async def exchange_code(token_uri, auth_code, client_id, client_secret, redirect "code": auth_code, "client_secret": client_secret, }, + headers={"Authorization": f"Basic {auth_value}"}, ) return response -class SAMLAuthenticator: - mode = Mode.external +class SAMLAuthenticator(ExternalAuthenticator): def __init__( self, saml_settings, # See EXAMPLE_SAML_SETTINGS below. - attribute_name, # which SAML attribute to use as 'id' for Idenity - confirmation_message=None, + attribute_name: str, # which SAML attribute to use as 'id' for Identity + confirmation_message: str = "", ): self.saml_settings = saml_settings self.attribute_name = attribute_name @@ -263,23 +361,15 @@ def __init__( from onelogin.saml2.auth import OneLogin_Saml2_Auth @router.get("/login") - async def saml_login(request: Request): + async def saml_login(request: Request) -> RedirectResponse: req = await prepare_saml_from_fastapi_request(request) auth = OneLogin_Saml2_Auth(req, self.saml_settings) - # saml_settings = auth.get_settings() - # metadata = saml_settings.get_sp_metadata() - # errors = saml_settings.validate_metadata(metadata) - # if len(errors) == 0: - # print(metadata) - # else: - # print("Error found on Metadata: %s" % (', '.join(errors))) callback_url = auth.login() - response = RedirectResponse(url=callback_url) - return response + return RedirectResponse(url=callback_url) self.include_routers = [router] - async def authenticate(self, request): + async def authenticate(self, request: Request) -> Optional[UserSessionState]: if not modules_available("onelogin"): raise ModuleNotFoundError("This SAMLAuthenticator requires the module 'oneline' to be installed.") from onelogin.saml2.auth import OneLogin_Saml2_Auth @@ -297,12 +387,12 @@ async def authenticate(self, request): attribute_as_list = auth.get_attributes()[self.attribute_name] # Confused in what situation this would have more than one item.... assert len(attribute_as_list) == 1 - return attribute_as_list[0] + return UserSessionState(attribute_as_list[0], {}) else: return None -async def prepare_saml_from_fastapi_request(request, debug=False): +async def prepare_saml_from_fastapi_request(request: Request) -> Mapping[str, str]: form_data = await request.form() rv = { "http_host": request.client.host, @@ -328,7 +418,7 @@ async def prepare_saml_from_fastapi_request(request, debug=False): return rv -class LDAPAuthenticator: +class LDAPAuthenticator(InternalAuthenticator): """ LDAP authenticator. The authenticator code is based on https://github.com/jupyterhub/ldapauthenticator @@ -472,6 +562,8 @@ class LDAPAuthenticator: This can be useful in an heterogeneous environment, when supplying a UNIX username to authenticate against AD. + confirmation_message: str + May be displayed by client after successful login. Examples -------- @@ -510,8 +602,6 @@ class LDAPAuthenticator: id: user02 """ - mode = Mode.password - def __init__( self, server_address, @@ -536,6 +626,7 @@ def __init__( attributes=None, auth_state_attributes=None, use_lookup_dn_username=True, + confirmation_message="", ): self.use_ssl = use_ssl self.use_tls = use_tls @@ -571,6 +662,7 @@ def __init__( self.server_address_list = server_address_list self.server_port = server_port if server_port is not None else self._server_port_default() + self.confirmation_message = confirmation_message def _server_port_default(self): if self.use_ssl: @@ -655,7 +747,7 @@ async def resolve_username(self, username_supplied_by_user): def get_connection(self, userdn, password): import ldap3 - # NOTE: setting 'acitve=False' essentially disables exclusion of inactive servers from the pool. + # NOTE: setting 'active=False' essentially disables exclusion of inactive servers from the pool. # It probably does not matter if the pool contains only one server, but it could have implications # when there are multiple servers in the pool. It is not clear what those implications are. # But using the default 'activate=True' results in the thread being blocked indefinitely @@ -675,14 +767,21 @@ def get_connection(self, userdn, password): server_port = self.server_port server = ldap3.Server( - server_addr, port=server_port, use_ssl=self.use_ssl, connect_timeout=self.connect_timeout + server_addr, + port=server_port, + use_ssl=self.use_ssl, + connect_timeout=self.connect_timeout, ) server_pool.add(server) auto_bind_no_ssl = ldap3.AUTO_BIND_TLS_BEFORE_BIND if self.use_tls else ldap3.AUTO_BIND_NO_TLS auto_bind = ldap3.AUTO_BIND_NO_TLS if self.use_ssl else auto_bind_no_ssl conn = ldap3.Connection( - server_pool, user=userdn, password=password, auto_bind=auto_bind, receive_timeout=self.receive_timeout + server_pool, + user=userdn, + password=password, + auto_bind=auto_bind, + receive_timeout=self.receive_timeout, ) return conn @@ -690,14 +789,17 @@ async def get_user_attributes(self, conn, userdn): attrs = {} if self.auth_state_attributes: search_func = functools.partial( - conn.search, userdn, "(objectClass=*)", attributes=self.auth_state_attributes + conn.search, + userdn, + "(objectClass=*)", + attributes=self.auth_state_attributes, ) found = await asyncio.get_running_loop().run_in_executor(None, search_func) if found: attrs = conn.entries[0].entry_attributes_as_dict return attrs - async def authenticate(self, username: str, password: str): + async def authenticate(self, username: str, password: str) -> Optional[UserSessionState]: import ldap3 username_saved = username # Save the user name passed as a parameter @@ -826,5 +928,6 @@ async def authenticate(self, username: str, password: str): user_info = await self.get_user_attributes(conn, userdn) if user_info: logger.debug("username:%s attributes:%s", username, user_info) - return {"name": username, "auth_state": user_info} - return username + # this path might never have been worked out...is it ever hit? + return UserSessionState(username, user_info) + return UserSessionState(username, {}) diff --git a/bluesky_httpserver/config_schemas/service_configuration.yml b/bluesky_httpserver/config_schemas/service_configuration.yml index 57343f7..a76e4d3 100644 --- a/bluesky_httpserver/config_schemas/service_configuration.yml +++ b/bluesky_httpserver/config_schemas/service_configuration.yml @@ -83,7 +83,7 @@ properties: description: | Type of Authenticator to use. - These are typically from the tiled.authenticators module, + These are typically from the bluesky_httpserver.authenticators module, though user-defined ones may be used as well. This is given as an import path. In an import path, packages/modules @@ -92,36 +92,21 @@ properties: Example: ```yaml - authenticator: bluesky_httpserver.examples.DummyAuthenticator + authenticator: bluesky_httpserver.authenticators:DummyAuthenticator ``` - args: - type: [object, "null"] - description: | - Named arguments to pass to Authenticator. If there are none, - `args` may be omitted or empty. + args: + type: object + description: | + Named arguments to pass to Authenticator. If there are none, + `args` may be omitted or empty. - Example: + Example: - ```yaml - authenticator: bluesky_httpserver.examples.PAMAuthenticator - args: - service: "custom_service" - ``` - # qserver_admins: - # type: array - # items: - # type: object - # additionalProperties: false - # required: - # - provider - # - id - # properties: - # provider: - # type: string - # id: - # type: string - # description: | - # Give users with these identities 'admin' Role. + ```yaml + authenticator: bluesky_httpserver.authenticators:PAMAuthenticator + args: + service: "custom_service" + ``` secret_keys: type: array items: diff --git a/bluesky_httpserver/database/core.py b/bluesky_httpserver/database/core.py index 163fac3..52d102f 100644 --- a/bluesky_httpserver/database/core.py +++ b/bluesky_httpserver/database/core.py @@ -1,6 +1,7 @@ import hashlib import uuid as uuid_module from datetime import datetime +from typing import Optional from alembic import command from alembic.config import Config @@ -10,13 +11,13 @@ from .alembic_utils import temp_alembic_ini from .base import Base -from .orm import APIKey, Identity, Principal, Session # , Role +from .orm import APIKey, Identity, PendingSession, Principal, Session # , Role # This is the alembic revision ID of the database revision # required by this version of Tiled. -REQUIRED_REVISION = "722ff4e4fcc7" +REQUIRED_REVISION = "a1b2c3d4e5f6" # This is list of all valid revisions (from current to oldest). -ALL_REVISIONS = ["722ff4e4fcc7", "481830dd6c11"] +ALL_REVISIONS = ["a1b2c3d4e5f6", "722ff4e4fcc7", "481830dd6c11"] # def create_default_roles(engine): @@ -294,3 +295,38 @@ def latest_principal_activity(db, principal): if all([t is None for t in all_activity]): return None return max(t for t in all_activity if t is not None) + + +def lookup_valid_pending_session_by_device_code(db, device_code: bytes) -> Optional[PendingSession]: + """ + Look up a pending session by its device code. + + Returns None if the pending session is not found or has expired. + """ + hashed_device_code = hashlib.sha256(device_code).digest() + pending_session = ( + db.query(PendingSession).filter(PendingSession.hashed_device_code == hashed_device_code).first() + ) + if pending_session is None: + return None + if pending_session.expiration_time is not None and pending_session.expiration_time < datetime.utcnow(): + db.delete(pending_session) + db.commit() + return None + return pending_session + + +def lookup_valid_pending_session_by_user_code(db, user_code: str) -> Optional[PendingSession]: + """ + Look up a pending session by its user code. + + Returns None if the pending session is not found or has expired. + """ + pending_session = db.query(PendingSession).filter(PendingSession.user_code == user_code).first() + if pending_session is None: + return None + if pending_session.expiration_time is not None and pending_session.expiration_time < datetime.utcnow(): + db.delete(pending_session) + db.commit() + return None + return pending_session diff --git a/bluesky_httpserver/database/orm.py b/bluesky_httpserver/database/orm.py index 17d7c82..7611824 100644 --- a/bluesky_httpserver/database/orm.py +++ b/bluesky_httpserver/database/orm.py @@ -181,3 +181,24 @@ class Session(Timestamped, Base): revoked = Column(Boolean, default=False, nullable=False) principal = relationship("Principal", back_populates="sessions") + pending_sessions = relationship("PendingSession", back_populates="session") + + +class PendingSession(Timestamped, Base): + """ + This is used only in Device Code Flow for OIDC authentication. + + When a CLI client initiates the device code flow, a pending session is created + with a device_code (for the client to poll) and a user_code (for the user to + enter in the browser). Once the user authenticates, the pending session is + linked to a real session, which the polling client then receives. + """ + + __tablename__ = "pending_sessions" + + hashed_device_code = Column(LargeBinary(32), primary_key=True, index=True, nullable=False) + user_code = Column(Unicode(8), index=True, nullable=False) + expiration_time = Column(DateTime(timezone=False), nullable=False) + session_id = Column(Integer, ForeignKey("sessions.id"), nullable=True) + + session = relationship("Session", back_populates="pending_sessions") diff --git a/bluesky_httpserver/schemas.py b/bluesky_httpserver/schemas.py index c52d8f2..f1d9fcb 100644 --- a/bluesky_httpserver/schemas.py +++ b/bluesky_httpserver/schemas.py @@ -163,6 +163,23 @@ class RefreshToken(pydantic.BaseModel): refresh_token: str +class DeviceCode(pydantic.BaseModel): + """Schema for device code token polling request.""" + + device_code: str + + +class DeviceCodeResponse(pydantic.BaseModel): + """Schema for device code flow initiation response.""" + + authorization_uri: str + verification_uri: str + device_code: str + user_code: str + expires_in: int + interval: int + + class AuthenticationMode(str, enum.Enum): password = "password" external = "external" diff --git a/bluesky_httpserver/tests/conftest.py b/bluesky_httpserver/tests/conftest.py index ec69415..d5cafdb 100644 --- a/bluesky_httpserver/tests/conftest.py +++ b/bluesky_httpserver/tests/conftest.py @@ -18,6 +18,22 @@ _user_group = "primary" +def _wait_for_http_server_ready(*, timeout=10, request_prefix="/api"): + """Wait until HTTP server accepts connections and responds to /status.""" + t_stop = ttime.time() + timeout + url = f"http://{SERVER_ADDRESS}:{SERVER_PORT}{request_prefix}/status" + while ttime.time() < t_stop: + try: + response = requests.get(url, timeout=0.5) + # Any HTTP response means the server is up (auth may still reject request). + if response.status_code: + return + except requests.RequestException: + pass + ttime.sleep(0.1) + raise TimeoutError(f"HTTP server is not ready after {timeout} s: {url}") + + @pytest.fixture(scope="module") def fastapi_server(xprocess): class Starter(ProcessStarter): @@ -29,6 +45,7 @@ class Starter(ProcessStarter): # args = f"start-bluesky-httpserver --host={SERVER_ADDRESS} --port {SERVER_PORT}".split() xprocess.ensure("fastapi_server", Starter) + _wait_for_http_server_ready() yield @@ -55,7 +72,7 @@ class Starter(ProcessStarter): args = f"uvicorn --host={http_server_host} --port {http_server_port} {bqss.__name__}:app".split() xprocess.ensure("fastapi_server", Starter) - ttime.sleep(1) + _wait_for_http_server_ready() yield start @@ -195,3 +212,28 @@ def wait_for_ip_kernel_idle(timeout, polling_period=0.2, api_key=API_KEY_FOR_TES return True return False + + +# ============================================================================ +# OIDC Test Fixtures +# ============================================================================ + + +@pytest.fixture +def oidc_base_url() -> str: + """Base URL for mock OIDC provider.""" + return "https://example.com/realms/example/" + + +@pytest.fixture +def well_known_response(oidc_base_url: str) -> dict: + """Mock OIDC well-known configuration response.""" + return { + "id_token_signing_alg_values_supported": ["RS256"], + "issuer": oidc_base_url.rstrip("/"), + "jwks_uri": f"{oidc_base_url}protocol/openid-connect/certs", + "authorization_endpoint": f"{oidc_base_url}protocol/openid-connect/auth", + "token_endpoint": f"{oidc_base_url}protocol/openid-connect/token", + "device_authorization_endpoint": f"{oidc_base_url}protocol/openid-connect/auth/device", + "end_session_endpoint": f"{oidc_base_url}protocol/openid-connect/logout", + } diff --git a/bluesky_httpserver/tests/test_authenticators.py b/bluesky_httpserver/tests/test_authenticators.py index cc2984c..53c6bbe 100644 --- a/bluesky_httpserver/tests/test_authenticators.py +++ b/bluesky_httpserver/tests/test_authenticators.py @@ -1,9 +1,17 @@ import asyncio +import time +from typing import Any, Tuple +import httpx import pytest +from cryptography.hazmat.primitives.asymmetric import rsa +from jose import ExpiredSignatureError, jwt +from jose.backends import RSAKey +from respx import MockRouter +from starlette.datastructures import URL, QueryParams # fmt: off -from ..authenticators import LDAPAuthenticator +from ..authenticators import LDAPAuthenticator, OIDCAuthenticator, ProxiedOIDCAuthenticator, UserSessionState @pytest.mark.parametrize("ldap_server_address, ldap_server_port", [ @@ -35,9 +43,244 @@ def test_LDAPAuthenticator_01(use_tls, use_ssl, ldap_server_address, ldap_server ) async def testing(): - assert await authenticator.authenticate("user01", "password1") == "user01" - assert await authenticator.authenticate("user02", "password2") == "user02" + assert await authenticator.authenticate("user01", "password1") == UserSessionState("user01", {}) + assert await authenticator.authenticate("user02", "password2") == UserSessionState("user02", {}) assert await authenticator.authenticate("user02a", "password2") is None assert await authenticator.authenticate("user02", "password2a") is None asyncio.run(testing()) + + +@pytest.fixture +def oidc_well_known_url(oidc_base_url: str) -> str: + return f"{oidc_base_url}.well-known/openid-configuration" + + +@pytest.fixture +def keys() -> Tuple[rsa.RSAPrivateKey, rsa.RSAPublicKey]: + private_key = rsa.generate_private_key(public_exponent=65537, key_size=2048) + public_key = private_key.public_key() + return (private_key, public_key) + + +@pytest.fixture +def json_web_keyset(keys: Tuple[rsa.RSAPrivateKey, rsa.RSAPublicKey]) -> list[dict[str, Any]]: + _, public_key = keys + return [RSAKey(key=public_key, algorithm="RS256").to_dict()] + + +@pytest.fixture +def mock_oidc_server( + respx_mock: MockRouter, + oidc_well_known_url: str, + well_known_response: dict[str, Any], + json_web_keyset: list[dict[str, Any]], +) -> MockRouter: + respx_mock.get(oidc_well_known_url).mock(return_value=httpx.Response(httpx.codes.OK, json=well_known_response)) + respx_mock.get(well_known_response["jwks_uri"]).mock( + return_value=httpx.Response(httpx.codes.OK, json={"keys": json_web_keyset}) + ) + return respx_mock + + +def token(issued: bool, expired: bool) -> dict[str, str]: + now = time.time() + return { + "aud": "tiled", + "exp": (now - 1500) if expired else (now + 1500), + "iat": (now - 1500) if issued else (now + 1500), + "iss": "https://example.com/realms/example", + "sub": "Jane Doe", + } + + +def encrypted_token(token_data: dict[str, str], private_key: rsa.RSAPrivateKey) -> str: + return jwt.encode( + token_data, + key=private_key, + algorithm="RS256", + headers={"kid": "secret"}, + ) + + +def test_oidc_authenticator_caching( + mock_oidc_server: MockRouter, + oidc_well_known_url: str, + well_known_response: dict[str, Any], + json_web_keyset: list[dict[str, Any]], +): + authenticator = OIDCAuthenticator("tiled", "tiled", "secret", well_known_uri=oidc_well_known_url) + assert authenticator.client_id == "tiled" + assert authenticator.authorization_endpoint == well_known_response["authorization_endpoint"] + assert authenticator.id_token_signing_alg_values_supported == well_known_response[ + "id_token_signing_alg_values_supported" + ] + assert authenticator.issuer == well_known_response["issuer"] + assert authenticator.jwks_uri == well_known_response["jwks_uri"] + assert authenticator.token_endpoint == well_known_response["token_endpoint"] + assert authenticator.device_authorization_endpoint == well_known_response["device_authorization_endpoint"] + assert authenticator.end_session_endpoint == well_known_response["end_session_endpoint"] + + assert len(mock_oidc_server.calls) == 1 + call_request = mock_oidc_server.calls[0].request + assert call_request.method == "GET" + assert call_request.url == oidc_well_known_url + + assert authenticator.keys() == json_web_keyset + assert len(mock_oidc_server.calls) == 2 + keys_request = mock_oidc_server.calls[1].request + assert keys_request.method == "GET" + assert keys_request.url == well_known_response["jwks_uri"] + + for _ in range(10): + assert authenticator.keys() == json_web_keyset + + assert len(mock_oidc_server.calls) == 2 + + +@pytest.mark.parametrize("issued", [True, False]) +@pytest.mark.parametrize("expired", [True, False]) +def test_oidc_decoding( + mock_oidc_server: MockRouter, + oidc_well_known_url: str, + issued: bool, + expired: bool, + keys: Tuple[rsa.RSAPrivateKey, rsa.RSAPublicKey], +): + private_key, _ = keys + authenticator = OIDCAuthenticator("tiled", "tiled", "secret", well_known_uri=oidc_well_known_url) + access_token = token(issued, expired) + encrypted_access_token = encrypted_token(access_token, private_key) + + if not expired: + assert authenticator.decode_token(encrypted_access_token) == access_token + else: + with pytest.raises(ExpiredSignatureError): + authenticator.decode_token(encrypted_access_token) + + +@pytest.mark.asyncio +async def test_proxied_oidc_token_retrieval(oidc_well_known_url: str, mock_oidc_server: MockRouter): + authenticator = ProxiedOIDCAuthenticator("tiled", "tiled", oidc_well_known_url, + device_flow_client_id="tiled-cli") + test_request = httpx.Request("GET", "http://example.com", headers={"Authorization": "bearer FOO"}) + + assert "FOO" == await authenticator.oauth2_schema(test_request) + + +def create_mock_oidc_request(query_params=None): + if query_params is None: + query_params = {} + + class MockRequest: + def __init__(self, request_query_params): + self.query_params = QueryParams(request_query_params) + self.scope = { + "type": "http", + "scheme": "http", + "server": ("localhost", 8000), + "path": "/api/v1/auth/provider/orcid/code", + "headers": [], + } + self.headers = {"host": "localhost:8000"} + self.url = URL("http://localhost:8000/api/v1/auth/provider/orcid/code") + + return MockRequest(query_params) + + +@pytest.mark.asyncio +async def test_OIDCAuthenticator_mock( + mock_oidc_server: MockRouter, + oidc_well_known_url: str, + well_known_response: dict[str, Any], + monkeypatch, +): + mock_jwt_payload = { + "sub": "0009-0008-8698-7745", + "aud": "APP-TEST-CLIENT-ID", + "iss": well_known_response["issuer"], + "exp": 9999999999, + "iat": 1000000000, + "given_name": "Test User", + } + + mock_oidc_server.post(well_known_response["token_endpoint"]).mock( + return_value=httpx.Response( + 200, + json={ + "access_token": "mock-access-token", + "id_token": "mock-id-token", + "token_type": "bearer", + }, + ) + ) + + authenticator = OIDCAuthenticator( + audience="APP-TEST-CLIENT-ID", + client_id="APP-TEST-CLIENT-ID", + client_secret="test-secret", + well_known_uri=oidc_well_known_url, + ) + + mock_request = create_mock_oidc_request({"code": "test-auth-code"}) + + def mock_jwt_decode(*args, **kwargs): + return mock_jwt_payload + + def mock_jwk_construct(*args, **kwargs): + class MockJWK: + pass + + return MockJWK() + + monkeypatch.setattr("jose.jwt.decode", mock_jwt_decode) + monkeypatch.setattr("jose.jwk.construct", mock_jwk_construct) + + user_session = await authenticator.authenticate(mock_request) + + assert user_session is not None + assert user_session.user_name == "0009-0008-8698-7745" + + +@pytest.mark.asyncio +async def test_OIDCAuthenticator_missing_code_parameter(oidc_well_known_url: str): + authenticator = OIDCAuthenticator( + audience="APP-TEST-CLIENT-ID", + client_id="APP-TEST-CLIENT-ID", + client_secret="test-secret", + well_known_uri=oidc_well_known_url, + ) + + mock_request = create_mock_oidc_request({}) + + result = await authenticator.authenticate(mock_request) + assert result is None + + +@pytest.mark.asyncio +async def test_OIDCAuthenticator_token_exchange_failure( + oidc_well_known_url: str, + mock_oidc_server, + well_known_response, +): + mock_oidc_server.post(well_known_response["token_endpoint"]).mock( + return_value=httpx.Response( + 400, + json={ + "error": "invalid_client", + "error_description": "Client not found: APP-TEST-CLIENT-ID", + }, + ) + ) + + authenticator = OIDCAuthenticator( + audience="APP-TEST-CLIENT-ID", + client_id="APP-TEST-CLIENT-ID", + client_secret="test-secret", + well_known_uri=oidc_well_known_url, + ) + + mock_request = create_mock_oidc_request({"code": "invalid-code"}) + + result = await authenticator.authenticate(mock_request) + assert result is None diff --git a/bluesky_httpserver/tests/test_oidc_authenticators.py b/bluesky_httpserver/tests/test_oidc_authenticators.py new file mode 100644 index 0000000..f3249cd --- /dev/null +++ b/bluesky_httpserver/tests/test_oidc_authenticators.py @@ -0,0 +1,219 @@ +"""Tests for OIDC Authenticator functionality.""" + +import time +from typing import Any, Tuple + +import httpx +import pytest +from cryptography.hazmat.primitives.asymmetric import rsa +from jose import ExpiredSignatureError, jwt +from jose.backends import RSAKey +from respx import MockRouter + +from bluesky_httpserver.authenticators import OIDCAuthenticator, ProxiedOIDCAuthenticator + + +@pytest.fixture +def oidc_well_known_url(oidc_base_url: str) -> str: + return f"{oidc_base_url}.well-known/openid-configuration" + + +@pytest.fixture +def keys() -> Tuple[rsa.RSAPrivateKey, rsa.RSAPublicKey]: + """Generate RSA key pair for testing.""" + private_key = rsa.generate_private_key(public_exponent=65537, key_size=2048) + public_key = private_key.public_key() + return (private_key, public_key) + + +@pytest.fixture +def json_web_keyset(keys: Tuple[rsa.RSAPrivateKey, rsa.RSAPublicKey]) -> list[dict[str, Any]]: + """Create a JSON Web Key Set from the test keys.""" + _, public_key = keys + return [RSAKey(key=public_key, algorithm="RS256").to_dict()] + + +@pytest.fixture +def mock_oidc_server( + respx_mock: MockRouter, + oidc_well_known_url: str, + well_known_response: dict[str, Any], + json_web_keyset: list[dict[str, Any]], +) -> MockRouter: + """Set up mock OIDC server endpoints.""" + respx_mock.get(oidc_well_known_url).mock(return_value=httpx.Response(httpx.codes.OK, json=well_known_response)) + respx_mock.get(well_known_response["jwks_uri"]).mock( + return_value=httpx.Response(httpx.codes.OK, json={"keys": json_web_keyset}) + ) + return respx_mock + + +def create_token(issued: bool, expired: bool) -> dict[str, Any]: + """Create a test JWT token.""" + now = time.time() + return { + "aud": "test_client", + "exp": (now - 1500) if expired else (now + 1500), + "iat": (now - 1500) if issued else (now + 1500), + "iss": "https://example.com/realms/example", + "sub": "test_user", + } + + +def encrypt_token(token: dict[str, Any], private_key: rsa.RSAPrivateKey) -> str: + """Encrypt a token with the test private key.""" + return jwt.encode( + token, + key=private_key, + algorithm="RS256", + headers={"kid": "test_key"}, + ) + + +@pytest.mark.filterwarnings("ignore::DeprecationWarning") +class TestOIDCAuthenticator: + """Tests for OIDCAuthenticator class.""" + + def test_oidc_authenticator_caching( + self, + mock_oidc_server: MockRouter, + oidc_well_known_url: str, + well_known_response: dict[str, Any], + json_web_keyset: list[dict[str, Any]], + ): + """Test that OIDC configuration is cached after first fetch.""" + authenticator = OIDCAuthenticator( + audience="test_client", + client_id="test_client", + client_secret="secret", + well_known_uri=oidc_well_known_url, + ) + + # Access multiple properties to ensure caching works + assert authenticator.client_id == "test_client" + assert authenticator.authorization_endpoint == well_known_response["authorization_endpoint"] + assert ( + authenticator.id_token_signing_alg_values_supported + == well_known_response["id_token_signing_alg_values_supported"] + ) + assert authenticator.issuer == well_known_response["issuer"] + assert authenticator.jwks_uri == well_known_response["jwks_uri"] + assert authenticator.token_endpoint == well_known_response["token_endpoint"] + assert authenticator.device_authorization_endpoint == well_known_response["device_authorization_endpoint"] + assert authenticator.end_session_endpoint == well_known_response["end_session_endpoint"] + + # Should only call well-known endpoint once due to caching + assert len(mock_oidc_server.calls) == 1 + call_request = mock_oidc_server.calls[0].request + assert call_request.method == "GET" + assert call_request.url == oidc_well_known_url + + # Keys should also be cached + assert authenticator.keys() == json_web_keyset + assert len(mock_oidc_server.calls) == 2 # Now also fetched JWKS + + # Multiple calls should still be cached + for _ in range(5): + assert authenticator.keys() == json_web_keyset + assert len(mock_oidc_server.calls) == 2 # No new calls + + @pytest.mark.parametrize("issued", [True, False]) + @pytest.mark.parametrize("expired", [True, False]) + def test_oidc_token_decoding( + self, + mock_oidc_server: MockRouter, + oidc_well_known_url: str, + issued: bool, + expired: bool, + keys: Tuple[rsa.RSAPrivateKey, rsa.RSAPublicKey], + ): + """Test token decoding with various validity scenarios.""" + private_key, _ = keys + authenticator = OIDCAuthenticator( + audience="test_client", + client_id="test_client", + client_secret="secret", + well_known_uri=oidc_well_known_url, + ) + + token = create_token(issued, expired) + encrypted = encrypt_token(token, private_key) + + if not expired: + # Non-expired tokens should decode successfully + decoded = authenticator.decode_token(encrypted) + assert decoded["sub"] == "test_user" + assert decoded["aud"] == "test_client" + else: + # Expired tokens should raise an error + with pytest.raises(ExpiredSignatureError): + authenticator.decode_token(encrypted) + + def test_oidc_authenticator_properties( + self, + mock_oidc_server: MockRouter, + oidc_well_known_url: str, + well_known_response: dict[str, Any], + ): + """Test that all authenticator properties are correctly set.""" + authenticator = OIDCAuthenticator( + audience="my_audience", + client_id="my_client_id", + client_secret="my_secret", + well_known_uri=oidc_well_known_url, + confirmation_message="Logged in as {id}", + redirect_on_success="https://app.example.com/success", + redirect_on_failure="https://app.example.com/failure", + ) + + assert authenticator.client_id == "my_client_id" + assert authenticator.confirmation_message == "Logged in as {id}" + assert authenticator.redirect_on_success == "https://app.example.com/success" + assert authenticator.redirect_on_failure == "https://app.example.com/failure" + + +@pytest.mark.filterwarnings("ignore::DeprecationWarning") +class TestProxiedOIDCAuthenticator: + """Tests for ProxiedOIDCAuthenticator class.""" + + @pytest.mark.asyncio + async def test_proxied_oidc_oauth2_schema( + self, + mock_oidc_server: MockRouter, + oidc_well_known_url: str, + ): + """Test that ProxiedOIDCAuthenticator extracts bearer token correctly.""" + authenticator = ProxiedOIDCAuthenticator( + audience="test_client", + client_id="test_client", + well_known_uri=oidc_well_known_url, + device_flow_client_id="test_cli_client", + ) + + # Create a mock request with Authorization header + test_request = httpx.Request( + "GET", + "http://example.com/api/test", + headers={"Authorization": "Bearer TEST_TOKEN"}, + ) + + # The oauth2_schema should extract the bearer token + token = await authenticator.oauth2_schema(test_request) + assert token == "TEST_TOKEN" + + def test_proxied_oidc_with_scopes( + self, + mock_oidc_server: MockRouter, + oidc_well_known_url: str, + ): + """Test ProxiedOIDCAuthenticator with custom scopes.""" + authenticator = ProxiedOIDCAuthenticator( + audience="test_client", + client_id="test_client", + well_known_uri=oidc_well_known_url, + device_flow_client_id="test_cli_client", + scopes=["openid", "profile", "email"], + ) + + assert authenticator.scopes == ["openid", "profile", "email"] + assert authenticator.device_flow_client_id == "test_cli_client" diff --git a/.github/workflows/docker-configs/ldap-docker-compose.yml b/continuous_integration/docker-configs/ldap-docker-compose.yml similarity index 74% rename from .github/workflows/docker-configs/ldap-docker-compose.yml rename to continuous_integration/docker-configs/ldap-docker-compose.yml index 5cf12a8..2b2c45a 100644 --- a/.github/workflows/docker-configs/ldap-docker-compose.yml +++ b/continuous_integration/docker-configs/ldap-docker-compose.yml @@ -1,8 +1,6 @@ -version: '2' - services: openldap: - image: docker.io/bitnami/openldap:latest + image: osixia/openldap:latest ports: - '1389:1389' - '1636:1636' @@ -12,7 +10,7 @@ services: - LDAP_USERS=user01,user02 - LDAP_PASSWORDS=password1,password2 volumes: - - 'openldap_data:/bitnami/openldap' + - 'openldap_data:/var/lib/ldap' volumes: openldap_data: diff --git a/continuous_integration/scripts/start_LDAP.sh b/continuous_integration/scripts/start_LDAP.sh new file mode 100755 index 0000000..ecfa1cf --- /dev/null +++ b/continuous_integration/scripts/start_LDAP.sh @@ -0,0 +1,7 @@ +#!/bin/bash +set -e + +# Start LDAP server in docker container +docker pull osixia/openldap:latest +docker compose -f continuous_integration/docker-configs/ldap-docker-compose.yml up -d +docker ps diff --git a/docker/test.Dockerfile b/docker/test.Dockerfile new file mode 100644 index 0000000..2e994cf --- /dev/null +++ b/docker/test.Dockerfile @@ -0,0 +1,29 @@ +ARG PYTHON_VERSION=3.13 +FROM python:${PYTHON_VERSION}-slim + +ENV PYTHONUNBUFFERED=1 \ + PIP_DISABLE_PIP_VERSION_CHECK=1 + +RUN apt-get update && apt-get install -y --no-install-recommends \ + bash \ + build-essential \ + git \ + redis-server \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /workspace + +COPY requirements.txt requirements-dev.txt ./ +COPY pyproject.toml setup.py setup.cfg MANIFEST.in versioneer.py README.rst AUTHORS.rst LICENSE ./ +COPY bluesky_httpserver ./bluesky_httpserver + +RUN python -m pip install --upgrade pip setuptools wheel numpy && \ + python -m pip install git+https://github.com/bluesky/bluesky-queueserver.git && \ + python -m pip install git+https://github.com/bluesky/bluesky-queueserver-api.git && \ + python -m pip install -r requirements-dev.txt && \ + python -m pip install . + +COPY scripts/docker/run_shard_in_container.sh /usr/local/bin/run_shard_in_container.sh +RUN chmod +x /usr/local/bin/run_shard_in_container.sh + +ENTRYPOINT ["/usr/local/bin/run_shard_in_container.sh"] diff --git a/docs/source/configuration.rst b/docs/source/configuration.rst index eb31efa..8852a31 100644 --- a/docs/source/configuration.rst +++ b/docs/source/configuration.rst @@ -294,6 +294,85 @@ See the documentation on ``LDAPAuthenticator`` for more details. authenticators.LDAPAuthenticator +OIDC Authenticator +++++++++++++++++++ + +``OIDCAuthenticator`` integrates the server with third-party OpenID Connect providers +such as Google, Microsoft Entra ID, ORCID and others. The server does not process user +passwords directly: authentication is delegated to the provider and the server validates +the returned OIDC token. + +General setup steps: + +#. Register an application with the OIDC provider. +#. Configure redirect URIs for the provider application. For provider name ``entra`` and + host ``https://your-server.example`` the redirect URIs are: + + - ``https://your-server.example/api/auth/provider/entra/code`` + - ``https://your-server.example/api/auth/provider/entra/device_code`` + +#. Store the client secret in environment variable and reference it in config. +#. Use provider's ``.well-known/openid-configuration`` URL. + +Typical ``well_known_uri`` values: + +- Google: ``https://accounts.google.com/.well-known/openid-configuration`` +- Microsoft Entra ID: ``https://login.microsoftonline.com//v2.0/.well-known/openid-configuration`` +- ORCID: ``https://orcid.org/.well-known/openid-configuration`` + +Example configuration (Microsoft Entra ID):: + + authentication: + providers: + - provider: entra + authenticator: bluesky_httpserver.authenticators:OIDCAuthenticator + args: + audience: 00000000-0000-0000-0000-000000000000 + client_id: 00000000-0000-0000-0000-000000000000 + client_secret: ${BSKY_ENTRA_SECRET} + well_known_uri: https://login.microsoftonline.com//v2.0/.well-known/openid-configuration + confirmation_message: "You have logged in successfully." + api_access: + policy: bluesky_httpserver.authorization:DictionaryAPIAccessControl + args: + users: + : + roles: + - admin + - expert + +Example configuration (Google):: + + authentication: + providers: + - provider: google + authenticator: bluesky_httpserver.authenticators:OIDCAuthenticator + args: + audience: + client_id: + client_secret: ${BSKY_GOOGLE_SECRET} + well_known_uri: https://accounts.google.com/.well-known/openid-configuration + api_access: + policy: bluesky_httpserver.authorization:DictionaryAPIAccessControl + args: + users: + : + roles: user + +.. note:: + + The name used in ``api_access/args/users`` must match the identity string produced by + the authenticator for your provider configuration. Verify with ``/api/auth/whoami`` after + successful login. + +See the documentation on ``OIDCAuthenticator`` for parameter details. + +.. autosummary:: + :nosignatures: + :toctree: generated + + authenticators.OIDCAuthenticator + Expiration Time for Tokens and Sessions +++++++++++++++++++++++++++++++++++++++ diff --git a/docs/source/usage.rst b/docs/source/usage.rst index 5e1e9b3..299bdcb 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -154,6 +154,61 @@ Then users ``bob``, ``alice`` and ``tom`` can log into the server as :: If authentication is successful, then the server returns access and refresh tokens. +Logging in with OIDC Providers (Google, Entra, ORCID, ...) +----------------------------------------------------------- + +For providers configured with ``OIDCAuthenticator``, use provider-specific endpoints +under ``/api/auth/provider//...``. + +Browser-first flow +****************** + +If you are already in a browser context, open: + +``/api/auth/provider//authorize`` + +This redirects to the OIDC provider login page and then back to the server callback. + +This can similarly be acheived using ``httpie`` by opening the URL in a browser after getting +the authorization URI from the server:: + + http POST http://localhost:60610/api/auth/provider/entra/authorize + +Which will return a token back to the bluesky http server after the user logs in to the provider +in their browser (or automatically if already logged in). The user then gets a token +for the bluesky HTTP server to use for subsequent API requests. This flow can be used +even when using the bluesky queueserver api in a terminal so long as that session can +spawn a browser for the user to log in to the provider. + +CLI/device flow +*************** + +For terminal clients (i.e. no browser possible), start with +``POST /api/auth/provider//authorize``. +The response includes: + +- ``authorization_uri``: open this URL in a browser +- ``verification_uri``: polling endpoint for the terminal client +- ``device_code`` and ``interval``: values for polling + +Example using ``httpie`` (provider ``entra``):: + + http POST http://localhost:60610/api/auth/provider/entra/authorize + +After opening ``authorization_uri`` in a browser and completing provider login, +poll ``verification_uri`` using ``device_code`` until tokens are issued:: + + http POST http://localhost:60610/api/auth/provider/entra/token \ + device_code='' + +When authorization is still pending, the endpoint returns ``authorization_pending``. +When complete, it returns access and refresh tokens. + +.. note:: + + In common same-device flows the callback can complete automatically without manually + typing the user code. Manual code entry remains available as a fallback path. + Generating API Keys ------------------- diff --git a/requirements-dev.txt b/requirements-dev.txt index dd7212a..e47dd72 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -3,13 +3,16 @@ black codecov coverage +cryptography fastapi[all] flake8 isort pre-commit pytest +pytest-asyncio pytest-xprocess py +respx sphinx ipython numpydoc diff --git a/requirements.txt b/requirements.txt index f465abd..1377ef0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,9 @@ alembic bluesky-queueserver bluesky-queueserver-api +cachetools fastapi +httpx ldap3 orjson pamela diff --git a/scripts/docker/run_shard_in_container.sh b/scripts/docker/run_shard_in_container.sh new file mode 100755 index 0000000..7fc23a7 --- /dev/null +++ b/scripts/docker/run_shard_in_container.sh @@ -0,0 +1,80 @@ +#!/usr/bin/env bash +set -euo pipefail + +SHARD_GROUP="${SHARD_GROUP:-1}" +SHARD_COUNT="${SHARD_COUNT:-3}" +ARTIFACTS_DIR="${ARTIFACTS_DIR:-/artifacts}" +PYTEST_EXTRA_ARGS="${PYTEST_EXTRA_ARGS:-}" + +mkdir -p "$ARTIFACTS_DIR" + +if [[ "$SHARD_GROUP" -lt 1 || "$SHARD_COUNT" -lt 1 || "$SHARD_GROUP" -gt "$SHARD_COUNT" ]]; then + echo "Invalid shard settings: SHARD_GROUP=$SHARD_GROUP SHARD_COUNT=$SHARD_COUNT" >&2 + exit 2 +fi + +export COVERAGE_FILE="$ARTIFACTS_DIR/.coverage.${SHARD_GROUP}" + +redis-server --save "" --appendonly no --daemonize yes +for _ in $(seq 1 50); do + if redis-cli ping >/dev/null 2>&1; then + break + fi + sleep 0.2 +done + +if ! redis-cli ping >/dev/null 2>&1; then + echo "Failed to start redis-server inside container" >&2 + exit 2 +fi + +mapfile -t shard_tests < <( + python - <<'PY' "$SHARD_GROUP" "$SHARD_COUNT" +import glob +import sys + +group = int(sys.argv[1]) +count = int(sys.argv[2]) + +tests = sorted(glob.glob("bluesky_httpserver/tests/test_*.py")) +selected = [path for idx, path in enumerate(tests) if idx % count == (group - 1)] + +for path in selected: + print(path) +PY +) + +if [[ "${#shard_tests[@]}" -eq 0 ]]; then + echo "No tests selected for shard ${SHARD_GROUP}/${SHARD_COUNT}; treating as success." + exit 0 +fi + +pytest_cmd=( + coverage + run + -m + pytest + --junitxml="$ARTIFACTS_DIR/junit.${SHARD_GROUP}.xml" + -vv +) + +if [[ -n "$PYTEST_EXTRA_ARGS" ]]; then + read -r -a extra_args <<< "$PYTEST_EXTRA_ARGS" + pytest_cmd+=("${extra_args[@]}") +fi + +pytest_cmd+=("${shard_tests[@]}") + +set +e +"${pytest_cmd[@]}" +test_status=$? +set -e + +if [[ "$test_status" -eq 5 ]]; then + echo "Pytest collected no tests for shard ${SHARD_GROUP}/${SHARD_COUNT}; treating as success." + test_status=0 +fi + +redis-cli shutdown nosave >/dev/null 2>&1 || true + +exit "$test_status" diff --git a/scripts/run_ci_docker_parallel.sh b/scripts/run_ci_docker_parallel.sh new file mode 100755 index 0000000..c9caee7 --- /dev/null +++ b/scripts/run_ci_docker_parallel.sh @@ -0,0 +1,480 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +IMAGE_TAG_BASE="bluesky-httpserver-test:local" +WORKER_COUNT="3" +CHUNK_COUNT="" +PYTHON_VERSIONS="latest" +PYTEST_EXTRA_ARGS="" +ARTIFACTS_DIR="$ROOT_DIR/.docker-test-artifacts" +DOCKER_NETWORK_NAME="bhs-ci-net" +LDAP_CONTAINER_NAME="bhs-ci-ldap" + +SUMMARY_TSV="" +SUMMARY_FAIL_LOGS="" +SUMMARY_TXT="" +SUMMARY_JSON="" +TESTS_START_EPOCH="" +TESTS_START_HUMAN="" + +SUPPORTED_PYTHON_VERSIONS=("3.10" "3.11" "3.12" "3.13") + +usage() { + cat <<'EOF' +Run bluesky-httpserver unit tests in Docker with dynamic chunk dispatch and optional Python-version matrix. + +Usage: + scripts/run_ci_docker_parallel.sh [options] + +Options: + --workers N, --worker-count N + Number of concurrent chunk workers (default: 3). + + --chunks N, --chunk-count N + Number of total chunks/splits to execute per Python version. + Default: workers * 3. + + --python-versions VALUE + Python version selection: latest | all | comma-separated list. + Examples: latest, all, 3.12, 3.11,3.13 + Default: latest (currently 3.13). + + --pytest-args "ARGS" + Extra arguments passed to pytest in each chunk. + Example: --pytest-args "-k oidc --maxfail=1" + + --artifacts-dir PATH + Output directory for all artifacts. + Default: .docker-test-artifacts under repository root. + + --image-tag TAG + Base docker image tag. Per-version tags will append -py. + Default: bluesky-httpserver-test:local + + -h, --help + Show this help message. + +Examples: + scripts/run_ci_docker_parallel.sh + scripts/run_ci_docker_parallel.sh --workers 8 --chunks 24 + scripts/run_ci_docker_parallel.sh --python-versions all --workers 8 --chunks 24 + scripts/run_ci_docker_parallel.sh --python-versions 3.11,3.13 --pytest-args "-k test_access_control" +EOF +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --workers|--worker-count) + WORKER_COUNT="$2" + shift 2 + ;; + --chunks|--chunk-count) + CHUNK_COUNT="$2" + shift 2 + ;; + --python-versions) + PYTHON_VERSIONS="$2" + shift 2 + ;; + --pytest-args) + PYTEST_EXTRA_ARGS="$2" + shift 2 + ;; + --artifacts-dir) + ARTIFACTS_DIR="$2" + shift 2 + ;; + --image-tag) + IMAGE_TAG_BASE="$2" + shift 2 + ;; + -h|--help) + usage + exit 0 + ;; + *) + echo "Unknown option: $1" >&2 + usage + exit 2 + ;; + esac +done + +if [[ "$WORKER_COUNT" -lt 1 ]]; then + echo "WORKER_COUNT must be >= 1" >&2 + exit 2 +fi + +if [[ -z "$CHUNK_COUNT" ]]; then + CHUNK_COUNT=$(( WORKER_COUNT * 3 )) +fi + +if [[ "$CHUNK_COUNT" -lt 1 ]]; then + echo "CHUNK_COUNT must be >= 1" >&2 + exit 2 +fi + +if ! command -v docker >/dev/null 2>&1; then + echo "docker is required but not found in PATH" >&2 + exit 2 +fi + +if ! docker info >/dev/null 2>&1; then + echo "docker daemon is not available" >&2 + exit 2 +fi + +normalize_python_versions() { + local selection="$1" + local raw + local normalized=() + + if [[ "$selection" == "latest" ]]; then + normalized=("3.13") + elif [[ "$selection" == "all" ]]; then + normalized=("${SUPPORTED_PYTHON_VERSIONS[@]}") + else + raw="${selection//,/ }" + read -r -a normalized <<< "$raw" + fi + + if [[ "${#normalized[@]}" -eq 0 ]]; then + echo "PYTHON_VERSIONS selection produced no versions" >&2 + exit 2 + fi + + for version in "${normalized[@]}"; do + if [[ ! " ${SUPPORTED_PYTHON_VERSIONS[*]} " =~ " ${version} " ]]; then + echo "Unsupported Python version '${version}'. Supported: ${SUPPORTED_PYTHON_VERSIONS[*]}" >&2 + exit 2 + fi + done + + echo "${normalized[@]}" +} + +ensure_ldap_image() { + local image_ref="bitnami/openldap:latest" + if docker image inspect "$image_ref" >/dev/null 2>&1; then + return + fi + + echo "LDAP image $image_ref not found locally; trying docker pull..." + if docker pull "$image_ref"; then + return + fi + + echo "docker pull failed; building bitnami/openldap:latest from source (CI fallback)." + local workdir="$ROOT_DIR/.docker-test-artifacts/bitnami-containers" + rm -rf "$workdir" + git clone --depth 1 https://github.com/bitnami/containers.git "$workdir" + (cd "$workdir/bitnami/openldap/2.6/debian-12" && docker build -t "$image_ref" .) +} + +start_services() { + ensure_ldap_image + + docker network rm "$DOCKER_NETWORK_NAME" >/dev/null 2>&1 || true + docker network create "$DOCKER_NETWORK_NAME" >/dev/null + + docker rm -f "$LDAP_CONTAINER_NAME" >/dev/null 2>&1 || true + docker run -d --rm \ + --name "$LDAP_CONTAINER_NAME" \ + --network "$DOCKER_NETWORK_NAME" \ + -e LDAP_ADMIN_USERNAME=admin \ + -e LDAP_ADMIN_PASSWORD=adminpassword \ + -e LDAP_USERS=user01,user02 \ + -e LDAP_PASSWORDS=password1,password2 \ + bitnami/openldap:latest >/dev/null + + sleep 2 +} + +stop_services() { + docker rm -f "$LDAP_CONTAINER_NAME" >/dev/null 2>&1 || true + docker network rm "$DOCKER_NETWORK_NAME" >/dev/null 2>&1 || true +} + +cleanup() { + stop_services +} + +collect_junit_totals() { + local artifacts_dir="$1" + + python - "$artifacts_dir" <<'PY' +import glob +import os +import sys +import xml.etree.ElementTree as ET + +artifacts_dir = sys.argv[1] +tests = failures = errors = files = 0 + +for path in sorted(glob.glob(os.path.join(artifacts_dir, "junit.*.xml"))): + files += 1 + try: + root = ET.parse(path).getroot() + except Exception: + continue + + if root.tag == "testsuite": + suites = [root] + elif root.tag == "testsuites": + suites = root.findall("testsuite") + else: + suites = [] + + for suite in suites: + tests += int(suite.attrib.get("tests", 0) or 0) + failures += int(suite.attrib.get("failures", 0) or 0) + errors += int(suite.attrib.get("errors", 0) or 0) + +print(f"{tests} {failures} {errors} {files}") +PY +} + +append_summary_row() { + local py_version="$1" + local chunks_total="$2" + local junit_files="$3" + local tests="$4" + local failures="$5" + local errors="$6" + local status="$7" + + printf "%s\t%s\t%s\t%s\t%s\t%s\t%s\n" \ + "$py_version" "$chunks_total" "$junit_files" "$tests" "$failures" "$errors" "$status" >> "$SUMMARY_TSV" +} + +write_summary_files() { + local end_epoch end_human elapsed_sec + + if [[ -z "$SUMMARY_TSV" || -z "$SUMMARY_TXT" || -z "$SUMMARY_JSON" ]]; then + return + fi + + if [[ ! -f "$SUMMARY_TSV" ]]; then + return + fi + + end_epoch="$(date +%s)" + end_human="$(date -u +"%Y-%m-%dT%H:%M:%SZ")" + + if [[ -n "$TESTS_START_EPOCH" ]]; then + elapsed_sec=$(( end_epoch - TESTS_START_EPOCH )) + else + elapsed_sec=0 + fi + + { + echo "Test Run Summary" + echo "Start (UTC): ${TESTS_START_HUMAN:-N/A}" + echo "End (UTC): $end_human" + echo "Elapsed: ${elapsed_sec}s" + echo + printf "%-8s %-8s %-7s %-8s %-10s %-8s %-6s\n" \ + "Python" "Status" "Chunks" "JUnit" "Tests" "Failures" "Errors" + printf "%-8s %-8s %-7s %-8s %-10s %-8s %-6s\n" \ + "------" "------" "------" "-----" "-----" "--------" "------" + + if [[ -s "$SUMMARY_TSV" ]]; then + while IFS=$'\t' read -r py_version chunks_total junit_files tests failures errors status; do + printf "%-8s %-8s %-7s %-8s %-10s %-8s %-6s\n" \ + "$py_version" "$status" "$chunks_total" "$junit_files" "$tests" "$failures" "$errors" + done < "$SUMMARY_TSV" + else + echo "No per-version summary rows were recorded." + fi + + if [[ -s "$SUMMARY_FAIL_LOGS" ]]; then + echo + echo "Failed Chunk Logs" + cat "$SUMMARY_FAIL_LOGS" + fi + } > "$SUMMARY_TXT" + + python - "$SUMMARY_TSV" "$SUMMARY_FAIL_LOGS" "$SUMMARY_JSON" "${TESTS_START_HUMAN:-N/A}" "$end_human" "$elapsed_sec" <<'PY' +import json +import sys + +summary_tsv, fail_logs_path, output_path, start_utc, end_utc, elapsed_sec = sys.argv[1:] + +rows = [] +with open(summary_tsv) as f: + for line in f: + parts = line.rstrip("\n").split("\t") + if len(parts) != 7: + continue + py_version, chunks_total, junit_files, tests, failures, errors, status = parts + rows.append( + { + "python_version": py_version, + "status": status, + "chunks_total": int(chunks_total), + "junit_files": int(junit_files), + "tests": int(tests), + "failures": int(failures), + "errors": int(errors), + } + ) + +failed_logs = [] +with open(fail_logs_path) as f: + failed_logs = [line.strip() for line in f if line.strip()] + +payload = { + "start_utc": start_utc, + "end_utc": end_utc, + "elapsed_seconds": int(elapsed_sec), + "python_versions": rows, + "failed_chunk_logs": failed_logs, +} + +with open(output_path, "w") as f: + json.dump(payload, f, indent=2) + f.write("\n") +PY + + echo "==> Test run end time (UTC): $end_human" + echo "==> Test run elapsed: ${elapsed_sec}s" + echo "==> Summary written: $SUMMARY_TXT" + echo "==> Summary JSON: $SUMMARY_JSON" +} + +on_exit() { + local exit_code=$? + write_summary_files || true + cleanup + trap - EXIT + exit "$exit_code" +} + +trap on_exit EXIT + +read -r -a SELECTED_PYTHON_VERSIONS <<< "$(normalize_python_versions "$PYTHON_VERSIONS")" + +echo "==> Preparing artifacts directory: $ARTIFACTS_DIR" +rm -rf "$ARTIFACTS_DIR" +mkdir -p "$ARTIFACTS_DIR" + +SUMMARY_TSV="$ARTIFACTS_DIR/.summary_rows.tsv" +SUMMARY_FAIL_LOGS="$ARTIFACTS_DIR/.summary_fail_logs.txt" +SUMMARY_TXT="$ARTIFACTS_DIR/summary.txt" +SUMMARY_JSON="$ARTIFACTS_DIR/summary.json" + +: > "$SUMMARY_TSV" +: > "$SUMMARY_FAIL_LOGS" + +echo "==> Starting shared services (LDAP)" +start_services + +TESTS_START_EPOCH="$(date +%s)" +TESTS_START_HUMAN="$(date -u +"%Y-%m-%dT%H:%M:%SZ")" +echo "==> Test run start time (UTC): $TESTS_START_HUMAN" +echo "==> Python versions selected: ${SELECTED_PYTHON_VERSIONS[*]}" + +run_chunk() { + local group="$1" + local log_file="$CURRENT_ARTIFACTS_DIR/shard.${group}.log" + + if docker run --rm \ + --network "$DOCKER_NETWORK_NAME" \ + -e SHARD_GROUP="$group" \ + -e SHARD_COUNT="$CHUNK_COUNT" \ + -e ARTIFACTS_DIR="/artifacts" \ + -e PYTEST_EXTRA_ARGS="$PYTEST_EXTRA_ARGS" \ + -e QSERVER_TEST_LDAP_HOST="$LDAP_CONTAINER_NAME" \ + -e QSERVER_TEST_LDAP_PORT="1389" \ + -e QSERVER_TEST_REDIS_ADDR="localhost" \ + -e QSERVER_HTTP_TEST_BIND_HOST="127.0.0.1" \ + -e QSERVER_HTTP_TEST_HOST="127.0.0.1" \ + -v "$CURRENT_ARTIFACTS_DIR:/artifacts" \ + "$CURRENT_IMAGE_TAG" >"$log_file" 2>&1; then + : > "$CURRENT_ARTIFACTS_DIR/.status.${group}.ok" + else + : > "$CURRENT_ARTIFACTS_DIR/.status.${group}.fail" + exit 1 + fi +} + +export -f run_chunk +export CHUNK_COUNT PYTEST_EXTRA_ARGS DOCKER_NETWORK_NAME LDAP_CONTAINER_NAME + +for PYTHON_VERSION in "${SELECTED_PYTHON_VERSIONS[@]}"; do + CURRENT_IMAGE_TAG="${IMAGE_TAG_BASE}-py${PYTHON_VERSION}" + CURRENT_ARTIFACTS_DIR="$ARTIFACTS_DIR/py${PYTHON_VERSION}" + export CURRENT_IMAGE_TAG CURRENT_ARTIFACTS_DIR + + echo "==> Building test image: $CURRENT_IMAGE_TAG (Python $PYTHON_VERSION)" + docker build \ + --build-arg PYTHON_VERSION="$PYTHON_VERSION" \ + -f "$ROOT_DIR/docker/test.Dockerfile" \ + -t "$CURRENT_IMAGE_TAG" \ + "$ROOT_DIR" + + mkdir -p "$CURRENT_ARTIFACTS_DIR" + + echo "==> [Python $PYTHON_VERSION] Starting dynamic dispatch: $WORKER_COUNT workers over $CHUNK_COUNT chunks" + if ! seq 1 "$CHUNK_COUNT" | xargs -P "$WORKER_COUNT" -I {} bash -lc 'run_chunk "$1"' _ {}; then + echo "One or more chunks failed for Python $PYTHON_VERSION." >&2 + read -r TOTAL_TESTS TOTAL_FAILURES TOTAL_ERRORS TOTAL_JUNIT_FILES < <(collect_junit_totals "$CURRENT_ARTIFACTS_DIR") + for group in $(seq 1 "$CHUNK_COUNT"); do + if [[ -f "$CURRENT_ARTIFACTS_DIR/.status.${group}.fail" ]]; then + echo "Chunk $group failed. Log: $CURRENT_ARTIFACTS_DIR/shard.${group}.log" >&2 + echo "$CURRENT_ARTIFACTS_DIR/shard.${group}.log" >> "$SUMMARY_FAIL_LOGS" + fi + done + append_summary_row "py${PYTHON_VERSION}" "$CHUNK_COUNT" "$TOTAL_JUNIT_FILES" \ + "$TOTAL_TESTS" "$TOTAL_FAILURES" "$TOTAL_ERRORS" "FAIL" + exit 1 + fi + + for group in $(seq 1 "$CHUNK_COUNT"); do + if [[ -f "$CURRENT_ARTIFACTS_DIR/.status.${group}.ok" ]]; then + echo "[Python $PYTHON_VERSION] Chunk $group completed successfully" + fi + done + + rm -f "$CURRENT_ARTIFACTS_DIR"/.status.*.ok "$CURRENT_ARTIFACTS_DIR"/.status.*.fail + + echo "==> [Python $PYTHON_VERSION] Merging coverage artifacts" + docker run --rm \ + --entrypoint bash \ + -v "$CURRENT_ARTIFACTS_DIR:/artifacts" \ + "$CURRENT_IMAGE_TAG" \ + -lc "set -euo pipefail; \ + python -m coverage combine /artifacts/.coverage.* && \ + python -m coverage xml -o /artifacts/coverage.xml && \ + python -m coverage report -m > /artifacts/coverage.txt" + + if [[ "${#SELECTED_PYTHON_VERSIONS[@]}" -eq 1 ]]; then + cp "$CURRENT_ARTIFACTS_DIR/coverage.xml" "$ROOT_DIR/coverage.xml" + else + cp "$CURRENT_ARTIFACTS_DIR/coverage.xml" "$ROOT_DIR/coverage.py${PYTHON_VERSION}.xml" + fi + + read -r TOTAL_TESTS TOTAL_FAILURES TOTAL_ERRORS TOTAL_JUNIT_FILES < <(collect_junit_totals "$CURRENT_ARTIFACTS_DIR") + echo "==> [Python $PYTHON_VERSION] JUnit summary: tests=$TOTAL_TESTS failures=$TOTAL_FAILURES errors=$TOTAL_ERRORS files=$TOTAL_JUNIT_FILES" + + VERSION_STATUS="PASS" + if [[ "$TOTAL_FAILURES" -gt 0 || "$TOTAL_ERRORS" -gt 0 ]]; then + VERSION_STATUS="FAIL" + fi + + append_summary_row "py${PYTHON_VERSION}" "$CHUNK_COUNT" "$TOTAL_JUNIT_FILES" \ + "$TOTAL_TESTS" "$TOTAL_FAILURES" "$TOTAL_ERRORS" "$VERSION_STATUS" +done + +echo "==> Completed. Artifacts:" +echo " versioned logs : $ARTIFACTS_DIR/py/shard..log" +echo " versioned junit : $ARTIFACTS_DIR/py/junit..xml" +echo " versioned coverage : $ARTIFACTS_DIR/py/{coverage.txt,coverage.xml}" +echo " run summary : $ARTIFACTS_DIR/{summary.txt,summary.json}" + +if [[ "${#SELECTED_PYTHON_VERSIONS[@]}" -eq 1 ]]; then + echo " root coverage xml : $ROOT_DIR/coverage.xml" +else + echo " root coverage xmls : $ROOT_DIR/coverage.py.xml" +fi diff --git a/start_LDAP.sh b/start_LDAP.sh deleted file mode 100644 index 8b612de..0000000 --- a/start_LDAP.sh +++ /dev/null @@ -1,8 +0,0 @@ - -#!/bin/bash -set -e - -# Start LDAP server in docker container -# sudo docker pull osixia/openldap:latest -sudo docker compose -f .github/workflows/docker-configs/ldap-docker-compose.yml up -d -sudo docker ps