From e96a7a3a502e8451201731eff42778228d390b27 Mon Sep 17 00:00:00 2001 From: HyeockJinKim Date: Fri, 6 Mar 2026 00:43:47 +0900 Subject: [PATCH 01/17] feat(BA-4904): add GraphQL ResourceSlotType node with root queries and connections - Add ResourceSlotTypeGQL(Node) exposing all resource_slot_types columns (slot_name, slot_type, display_name, description, display_unit, display_icon, number_format, rank) with ResourceSlotTypeConnectionGQL - Add AgentResourceSlotGQL(Node) for per-slot capacity/usage on agents with AgentResourceConnectionGQL; wire as resource_slots field on AgentV2GQL - Add KernelResourceAllocationGQL(Node) for per-slot allocation on kernels with ResourceAllocationConnectionGQL; wire as resource_allocations field on KernelV2GQL - Add root queries resource_slot_type(slot_name) and resource_slot_types() - Shared fetcher functions reused across root queries and connection resolvers - Add AllSlotTypesAction/GetSlotTypeAction to ResourceSlotService and processors - Add NumberFormatData to data layer; add RESOURCE_SLOT_TYPE to EntityType enum Co-Authored-By: Claude Sonnet 4.6 --- src/ai/backend/manager/api/gql/agent/types.py | 16 ++ .../backend/manager/api/gql/kernel/types.py | 16 ++ .../manager/api/gql/resource_slot/__init__.py | 1 + .../manager/api/gql/resource_slot/fetcher.py | 181 ++++++++++++ .../manager/api/gql/resource_slot/resolver.py | 28 ++ .../manager/api/gql/resource_slot/types.py | 262 ++++++++++++++++++ src/ai/backend/manager/api/gql/schema.py | 3 + .../resource_slot/actions/__init__.py | 3 + .../resource_slot/actions/all_slot_types.py | 37 +++ .../services/resource_slot/processors.py | 5 + .../manager/services/resource_slot/service.py | 21 ++ 11 files changed, 573 insertions(+) create mode 100644 src/ai/backend/manager/api/gql/resource_slot/__init__.py create mode 100644 src/ai/backend/manager/api/gql/resource_slot/fetcher.py create mode 100644 src/ai/backend/manager/api/gql/resource_slot/resolver.py create mode 100644 src/ai/backend/manager/api/gql/resource_slot/types.py create mode 100644 src/ai/backend/manager/services/resource_slot/actions/all_slot_types.py diff --git a/src/ai/backend/manager/api/gql/agent/types.py b/src/ai/backend/manager/api/gql/agent/types.py index 4bb1566c721..70f8f8fed38 100644 --- a/src/ai/backend/manager/api/gql/agent/types.py +++ b/src/ai/backend/manager/api/gql/agent/types.py @@ -20,6 +20,7 @@ KernelV2FilterGQL, KernelV2OrderByGQL, ) + from ai.backend.manager.api.gql.resource_slot.types import AgentResourceConnectionGQL from ai.backend.manager.api.gql.session.types import ( SessionV2ConnectionGQL, SessionV2FilterGQL, @@ -489,6 +490,21 @@ async def sessions( base_conditions=[SessionConditions.by_agent_id(self._agent_id)], ) + @strawberry.field( # type: ignore[misc] + description="Added in 26.4.0. Per-slot resource capacity and usage for this agent." + ) + async def resource_slots( + self, + info: Info[StrawberryGQLContext], + ) -> Annotated[ + AgentResourceConnectionGQL, + strawberry.lazy("ai.backend.manager.api.gql.resource_slot.types"), + ]: + """Fetch per-slot resource capacity and usage for this agent.""" + from ai.backend.manager.api.gql.resource_slot.fetcher import fetch_agent_resources + + return await fetch_agent_resources(info=info, agent_id=str(self._agent_id)) + @classmethod async def resolve_nodes( # type: ignore[override] # Strawberry Node uses AwaitableOrValue overloads incompatible with async def cls, diff --git a/src/ai/backend/manager/api/gql/kernel/types.py b/src/ai/backend/manager/api/gql/kernel/types.py index c19ffaec1bd..b39dbb3d7a9 100644 --- a/src/ai/backend/manager/api/gql/kernel/types.py +++ b/src/ai/backend/manager/api/gql/kernel/types.py @@ -17,6 +17,7 @@ from ai.backend.manager.api.gql.base import OrderDirection, UUIDFilter if TYPE_CHECKING: + from ai.backend.manager.api.gql.resource_slot.types import ResourceAllocationConnectionGQL from ai.backend.manager.api.gql.session.types import SessionV2GQL from ai.backend.manager.repositories.base import QueryCondition @@ -464,6 +465,21 @@ async def session( ): raise NotImplementedError + @strawberry.field( # type: ignore[misc] + description="Added in 26.4.0. Per-slot resource allocation for this kernel." + ) + async def resource_allocations( + self, + info: Info[StrawberryGQLContext], + ) -> Annotated[ + ResourceAllocationConnectionGQL, + strawberry.lazy("ai.backend.manager.api.gql.resource_slot.types"), + ]: + """Fetch per-slot resource allocation for this kernel.""" + from ai.backend.manager.api.gql.resource_slot.fetcher import fetch_kernel_allocations + + return await fetch_kernel_allocations(info=info, kernel_id=str(self.id)) + @classmethod async def resolve_nodes( # type: ignore[override] # Strawberry Node uses AwaitableOrValue overloads incompatible with async def cls, diff --git a/src/ai/backend/manager/api/gql/resource_slot/__init__.py b/src/ai/backend/manager/api/gql/resource_slot/__init__.py new file mode 100644 index 00000000000..9d48db4f9f8 --- /dev/null +++ b/src/ai/backend/manager/api/gql/resource_slot/__init__.py @@ -0,0 +1 @@ +from __future__ import annotations diff --git a/src/ai/backend/manager/api/gql/resource_slot/fetcher.py b/src/ai/backend/manager/api/gql/resource_slot/fetcher.py new file mode 100644 index 00000000000..ea503f23c50 --- /dev/null +++ b/src/ai/backend/manager/api/gql/resource_slot/fetcher.py @@ -0,0 +1,181 @@ +"""Fetcher functions for resource slot GQL queries. + +These functions are shared between root queries (resolver.py) and node connection +resolvers (AgentV2GQL.resource_slots, KernelV2GQL.resource_allocations) to avoid +duplicating query logic. +""" + +from __future__ import annotations + +import strawberry +from strawberry import Info + +from ai.backend.manager.api.gql.base import encode_cursor +from ai.backend.manager.api.gql.types import StrawberryGQLContext +from ai.backend.manager.errors.resource_slot import ResourceSlotTypeNotFound +from ai.backend.manager.services.resource_slot.actions.all_slot_types import AllSlotTypesAction +from ai.backend.manager.services.resource_slot.actions.get_agent_resources import ( + GetAgentResourcesAction, +) +from ai.backend.manager.services.resource_slot.actions.get_kernel_allocations import ( + GetKernelAllocationsAction, +) +from ai.backend.manager.services.resource_slot.actions.get_resource_slot_type import ( + GetResourceSlotTypeAction, +) + +from .types import ( + AgentResourceConnectionGQL, + AgentResourceSlotEdgeGQL, + AgentResourceSlotGQL, + KernelResourceAllocationEdgeGQL, + KernelResourceAllocationGQL, + ResourceAllocationConnectionGQL, + ResourceSlotTypeConnectionGQL, + ResourceSlotTypeEdgeGQL, + ResourceSlotTypeGQL, +) + + +async def fetch_resource_slot_types( + info: Info[StrawberryGQLContext], +) -> ResourceSlotTypeConnectionGQL: + """Fetch all registered resource slot types (shared between root query and node resolver).""" + action_result = await info.context.processors.resource_slot.all_slot_types.wait_for_complete( + AllSlotTypesAction() + ) + + edges = [] + for data in action_result.items: + node = ResourceSlotTypeGQL.from_data(data) + cursor = encode_cursor(data.slot_name) + edges.append(ResourceSlotTypeEdgeGQL(node=node, cursor=cursor)) + + page_info = strawberry.relay.PageInfo( + has_next_page=False, + has_previous_page=False, + start_cursor=edges[0].cursor if edges else None, + end_cursor=edges[-1].cursor if edges else None, + ) + + return ResourceSlotTypeConnectionGQL( + count=len(edges), + edges=edges, + page_info=page_info, + ) + + +async def fetch_resource_slot_type( + info: Info[StrawberryGQLContext], + slot_name: str, +) -> ResourceSlotTypeGQL | None: + """Fetch a single resource slot type by slot_name (used by Node resolution and root query).""" + try: + action_result = ( + await info.context.processors.resource_slot.get_resource_slot_type.wait_for_complete( + GetResourceSlotTypeAction(slot_name=slot_name) + ) + ) + except ResourceSlotTypeNotFound: + return None + return ResourceSlotTypeGQL.from_data(action_result.item) + + +async def fetch_agent_resources( + info: Info[StrawberryGQLContext], + agent_id: str, +) -> AgentResourceConnectionGQL: + """Fetch all per-slot resource entries for a given agent (shared for AgentV2GQL connection).""" + action_result = ( + await info.context.processors.resource_slot.get_agent_resources.wait_for_complete( + GetAgentResourcesAction(agent_id=agent_id) + ) + ) + + edges = [] + for data in action_result.items: + node = AgentResourceSlotGQL.from_data(data) + cursor = encode_cursor(f"{data.agent_id}:{data.slot_name}") + edges.append(AgentResourceSlotEdgeGQL(node=node, cursor=cursor)) + + page_info = strawberry.relay.PageInfo( + has_next_page=False, + has_previous_page=False, + start_cursor=edges[0].cursor if edges else None, + end_cursor=edges[-1].cursor if edges else None, + ) + + return AgentResourceConnectionGQL( + count=len(edges), + edges=edges, + page_info=page_info, + ) + + +async def fetch_agent_resource_slot( + info: Info[StrawberryGQLContext], + agent_id: str, + slot_name: str, +) -> AgentResourceSlotGQL | None: + """Fetch a single per-slot resource entry for an agent (used by Node resolution).""" + action_result = ( + await info.context.processors.resource_slot.get_agent_resources.wait_for_complete( + GetAgentResourcesAction(agent_id=agent_id) + ) + ) + for data in action_result.items: + if data.slot_name == slot_name: + return AgentResourceSlotGQL.from_data(data) + return None + + +async def fetch_kernel_allocations( + info: Info[StrawberryGQLContext], + kernel_id: str, +) -> ResourceAllocationConnectionGQL: + """Fetch all per-slot allocation entries for a kernel (shared for KernelV2GQL connection).""" + import uuid + + action_result = ( + await info.context.processors.resource_slot.get_kernel_allocations.wait_for_complete( + GetKernelAllocationsAction(kernel_id=uuid.UUID(kernel_id)) + ) + ) + + edges = [] + for data in action_result.items: + node = KernelResourceAllocationGQL.from_data(data) + cursor = encode_cursor(f"{data.kernel_id}:{data.slot_name}") + edges.append(KernelResourceAllocationEdgeGQL(node=node, cursor=cursor)) + + page_info = strawberry.relay.PageInfo( + has_next_page=False, + has_previous_page=False, + start_cursor=edges[0].cursor if edges else None, + end_cursor=edges[-1].cursor if edges else None, + ) + + return ResourceAllocationConnectionGQL( + count=len(edges), + edges=edges, + page_info=page_info, + ) + + +async def fetch_kernel_resource_allocation( + info: Info[StrawberryGQLContext], + kernel_id_str: str, + slot_name: str, +) -> KernelResourceAllocationGQL | None: + """Fetch a single per-slot allocation for a kernel (used by Node resolution).""" + import uuid + + action_result = ( + await info.context.processors.resource_slot.get_kernel_allocations.wait_for_complete( + GetKernelAllocationsAction(kernel_id=uuid.UUID(kernel_id_str)) + ) + ) + for data in action_result.items: + if data.slot_name == slot_name: + return KernelResourceAllocationGQL.from_data(data) + return None diff --git a/src/ai/backend/manager/api/gql/resource_slot/resolver.py b/src/ai/backend/manager/api/gql/resource_slot/resolver.py new file mode 100644 index 00000000000..c2ce34608cd --- /dev/null +++ b/src/ai/backend/manager/api/gql/resource_slot/resolver.py @@ -0,0 +1,28 @@ +"""Root query resolvers for resource slot type queries.""" + +from __future__ import annotations + +import strawberry +from strawberry import Info + +from ai.backend.manager.api.gql.types import StrawberryGQLContext + +from .fetcher import fetch_resource_slot_type, fetch_resource_slot_types +from .types import ResourceSlotTypeConnectionGQL, ResourceSlotTypeGQL + + +@strawberry.field( + description="Added in 26.4.0. Returns a single resource slot type by slot_name, or null." +) # type: ignore[misc] +async def resource_slot_type( + info: Info[StrawberryGQLContext], + slot_name: str, +) -> ResourceSlotTypeGQL | None: + return await fetch_resource_slot_type(info, slot_name) + + +@strawberry.field(description="Added in 26.4.0. Returns all registered resource slot types.") # type: ignore[misc] +async def resource_slot_types( + info: Info[StrawberryGQLContext], +) -> ResourceSlotTypeConnectionGQL: + return await fetch_resource_slot_types(info) diff --git a/src/ai/backend/manager/api/gql/resource_slot/types.py b/src/ai/backend/manager/api/gql/resource_slot/types.py new file mode 100644 index 00000000000..d45c3d7d4f7 --- /dev/null +++ b/src/ai/backend/manager/api/gql/resource_slot/types.py @@ -0,0 +1,262 @@ +"""GraphQL types for resource slot management. + +Covers: +- ResourceSlotTypeGQL: Registry node for a known resource slot type (resource_slot_types table) +- AgentResourceSlotGQL: Per-slot capacity/usage on an agent (agent_resources table) +- KernelResourceAllocationGQL: Per-slot allocation for a kernel (resource_allocations table) +""" + +from __future__ import annotations + +from collections.abc import Iterable +from decimal import Decimal +from typing import Any, Self + +import strawberry +from strawberry import ID, Info +from strawberry.relay import Connection, Edge, Node, NodeID + +from ai.backend.manager.api.gql.types import StrawberryGQLContext +from ai.backend.manager.api.gql.utils import dedent_strip +from ai.backend.manager.data.resource_slot.types import ( + AgentResourceData, + NumberFormatData, + ResourceAllocationData, + ResourceSlotTypeData, +) + +# ========== NumberFormat ========== + + +@strawberry.type( + name="NumberFormat", + description="Added in 26.4.0. Display number format configuration for a resource slot type.", +) +class NumberFormatGQL: + binary: bool = strawberry.field( + description="Whether to use binary (1024-based) prefix instead of decimal (1000-based)." + ) + round_length: int = strawberry.field(description="Number of decimal places to display.") + + @classmethod + def from_data(cls, data: NumberFormatData) -> Self: + return cls(binary=data.binary, round_length=data.round_length) + + +# ========== ResourceSlotTypeGQL (Node) ========== + + +@strawberry.type( + name="ResourceSlotType", + description=dedent_strip(""" + Added in 26.4.0. A registered resource slot type describing display metadata + and formatting rules for a specific resource (e.g., cpu, mem, cuda.device). + """), +) +class ResourceSlotTypeGQL(Node): + id: NodeID[str] + slot_name: str = strawberry.field( + description="Unique identifier for the resource slot (e.g., 'cpu', 'mem', 'cuda.device')." + ) + slot_type: str = strawberry.field( + description="Category of the slot type (e.g., 'count', 'bytes', 'unique-count')." + ) + display_name: str = strawberry.field(description="Human-readable name for display in UIs.") + description: str = strawberry.field( + description="Longer description of what this resource slot represents." + ) + display_unit: str = strawberry.field( + description="Unit label used when displaying resource amounts (e.g., 'GiB', 'cores')." + ) + display_icon: str = strawberry.field( + description="Icon identifier for UI rendering (e.g., 'cpu', 'memory', 'gpu')." + ) + number_format: NumberFormatGQL = strawberry.field( + description="Number formatting rules (binary vs decimal prefix, rounding)." + ) + rank: int = strawberry.field(description="Display ordering rank. Lower values appear first.") + + @classmethod + async def resolve_nodes( # type: ignore[override] + cls, + *, + info: Info[StrawberryGQLContext], + node_ids: Iterable[str], + required: bool = False, + ) -> Iterable[Self | None]: + from ai.backend.manager.api.gql.resource_slot.fetcher import fetch_resource_slot_type + + results = [] + for slot_name in node_ids: + node = await fetch_resource_slot_type(info, slot_name) + results.append(node) + return results + + @classmethod + def from_data(cls, data: ResourceSlotTypeData) -> Self: + return cls( + id=ID(data.slot_name), + slot_name=data.slot_name, + slot_type=data.slot_type, + display_name=data.display_name, + description=data.description, + display_unit=data.display_unit, + display_icon=data.display_icon, + number_format=NumberFormatGQL.from_data(data.number_format), + rank=data.rank, + ) + + +ResourceSlotTypeEdgeGQL = Edge[ResourceSlotTypeGQL] + + +@strawberry.type( + name="ResourceSlotTypeConnection", + description="Added in 26.4.0. Relay-style connection for paginated resource slot types.", +) +class ResourceSlotTypeConnectionGQL(Connection[ResourceSlotTypeGQL]): + count: int + + def __init__(self, *args: Any, count: int, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + self.count = count + + +# ========== AgentResourceSlotGQL (Node) ========== + + +@strawberry.type( + name="AgentResourceSlot", + description=dedent_strip(""" + Added in 26.4.0. Per-slot resource capacity and usage entry for an agent. + Represents one row from the agent_resources table. + """), +) +class AgentResourceSlotGQL(Node): + """Per-agent, per-slot resource capacity and usage.""" + + id: NodeID[str] + slot_name: str = strawberry.field( + description="Resource slot identifier (e.g., 'cpu', 'mem', 'cuda.device')." + ) + capacity: Decimal = strawberry.field( + description="Total hardware resource capacity for this slot on the agent." + ) + used: Decimal = strawberry.field( + description="Amount of this slot currently consumed by running and scheduled sessions." + ) + + @classmethod + async def resolve_nodes( # type: ignore[override] + cls, + *, + info: Info[StrawberryGQLContext], + node_ids: Iterable[str], + required: bool = False, + ) -> Iterable[Self | None]: + # Node ID format: "{agent_id}:{slot_name}" + results = [] + for node_id in node_ids: + agent_id, _, slot_name = node_id.partition(":") + from ai.backend.manager.api.gql.resource_slot.fetcher import ( + fetch_agent_resource_slot, + ) + + node = await fetch_agent_resource_slot(info, agent_id, slot_name) + results.append(node) + return results + + @classmethod + def from_data(cls, data: AgentResourceData) -> Self: + node_id = f"{data.agent_id}:{data.slot_name}" + return cls( + id=ID(node_id), + slot_name=data.slot_name, + capacity=data.capacity, + used=data.used, + ) + + +AgentResourceSlotEdgeGQL = Edge[AgentResourceSlotGQL] + + +@strawberry.type( + name="AgentResourceConnection", + description="Added in 26.4.0. Relay-style connection for per-slot agent resources.", +) +class AgentResourceConnectionGQL(Connection[AgentResourceSlotGQL]): + count: int + + def __init__(self, *args: Any, count: int, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + self.count = count + + +# ========== KernelResourceAllocationGQL (Node) ========== + + +@strawberry.type( + name="KernelResourceAllocation", + description=dedent_strip(""" + Added in 26.4.0. Per-slot resource allocation entry for a kernel. + Represents one row from the resource_allocations table. + """), +) +class KernelResourceAllocationGQL(Node): + """Per-kernel, per-slot resource allocation.""" + + id: NodeID[str] + slot_name: str = strawberry.field( + description="Resource slot identifier (e.g., 'cpu', 'mem', 'cuda.device')." + ) + requested: Decimal = strawberry.field( + description="Amount of this resource slot originally requested for the kernel." + ) + used: Decimal | None = strawberry.field( + description="Amount currently used. May be null if not yet measured." + ) + + @classmethod + async def resolve_nodes( # type: ignore[override] + cls, + *, + info: Info[StrawberryGQLContext], + node_ids: Iterable[str], + required: bool = False, + ) -> Iterable[Self | None]: + # Node ID format: "{kernel_id}:{slot_name}" + results = [] + for node_id in node_ids: + kernel_id_str, _, slot_name = node_id.partition(":") + from ai.backend.manager.api.gql.resource_slot.fetcher import ( + fetch_kernel_resource_allocation, + ) + + node = await fetch_kernel_resource_allocation(info, kernel_id_str, slot_name) + results.append(node) + return results + + @classmethod + def from_data(cls, data: ResourceAllocationData) -> Self: + node_id = f"{data.kernel_id}:{data.slot_name}" + return cls( + id=ID(node_id), + slot_name=data.slot_name, + requested=data.requested, + used=data.used, + ) + + +KernelResourceAllocationEdgeGQL = Edge[KernelResourceAllocationGQL] + + +@strawberry.type( + name="ResourceAllocationConnection", + description="Added in 26.4.0. Relay-style connection for per-slot kernel resource allocations.", +) +class ResourceAllocationConnectionGQL(Connection[KernelResourceAllocationGQL]): + count: int + + def __init__(self, *args: Any, count: int, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + self.count = count diff --git a/src/ai/backend/manager/api/gql/schema.py b/src/ai/backend/manager/api/gql/schema.py index 70852ca2dc6..2c73cfe6d78 100644 --- a/src/ai/backend/manager/api/gql/schema.py +++ b/src/ai/backend/manager/api/gql/schema.py @@ -197,6 +197,7 @@ resource_groups, update_resource_group_fair_share_spec, ) +from .resource_slot.resolver import resource_slot_type, resource_slot_types from .resource_usage import ( admin_domain_usage_buckets, admin_project_usage_buckets, @@ -306,6 +307,8 @@ class Query: admin_images_v2 = admin_images_v2 admin_kernels_v2 = admin_kernels_v2 admin_sessions_v2 = admin_sessions_v2 + resource_slot_type = resource_slot_type + resource_slot_types = resource_slot_types admin_image_aliases = admin_image_aliases # RBAC Admin APIs admin_role = admin_role diff --git a/src/ai/backend/manager/services/resource_slot/actions/__init__.py b/src/ai/backend/manager/services/resource_slot/actions/__init__.py index 656154a698d..b4ea4cf0eb6 100644 --- a/src/ai/backend/manager/services/resource_slot/actions/__init__.py +++ b/src/ai/backend/manager/services/resource_slot/actions/__init__.py @@ -1,3 +1,4 @@ +from .all_slot_types import AllSlotTypesAction, AllSlotTypesResult from .get_agent_resources import GetAgentResourcesAction, GetAgentResourcesResult from .get_domain_resource_overview import ( GetDomainResourceOverviewAction, @@ -17,6 +18,8 @@ from .search_resource_slot_types import SearchResourceSlotTypesAction, SearchResourceSlotTypesResult __all__ = ( + "AllSlotTypesAction", + "AllSlotTypesResult", "GetAgentResourcesAction", "GetAgentResourcesResult", "GetDomainResourceOverviewAction", diff --git a/src/ai/backend/manager/services/resource_slot/actions/all_slot_types.py b/src/ai/backend/manager/services/resource_slot/actions/all_slot_types.py new file mode 100644 index 00000000000..581d61d847c --- /dev/null +++ b/src/ai/backend/manager/services/resource_slot/actions/all_slot_types.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import override + +from ai.backend.common.data.permission.types import EntityType +from ai.backend.manager.actions.action import BaseActionResult +from ai.backend.manager.actions.types import ActionOperationType +from ai.backend.manager.data.resource_slot.types import ResourceSlotTypeData + +from .base import ResourceSlotAction + + +@dataclass +class AllSlotTypesAction(ResourceSlotAction): + @override + @classmethod + def entity_type(cls) -> EntityType: + return EntityType.RESOURCE_SLOT_TYPE + + @override + @classmethod + def operation_type(cls) -> ActionOperationType: + return ActionOperationType.SEARCH + + @override + def entity_id(self) -> str | None: + return None + + +@dataclass +class AllSlotTypesResult(BaseActionResult): + items: list[ResourceSlotTypeData] + + @override + def entity_id(self) -> str | None: + return None diff --git a/src/ai/backend/manager/services/resource_slot/processors.py b/src/ai/backend/manager/services/resource_slot/processors.py index db836645204..d9334f0ba41 100644 --- a/src/ai/backend/manager/services/resource_slot/processors.py +++ b/src/ai/backend/manager/services/resource_slot/processors.py @@ -7,6 +7,8 @@ from ai.backend.manager.actions.types import AbstractProcessorPackage, ActionSpec from .actions import ( + AllSlotTypesAction, + AllSlotTypesResult, GetAgentResourcesAction, GetAgentResourcesResult, GetDomainResourceOverviewAction, @@ -28,6 +30,7 @@ class ResourceSlotProcessors(AbstractProcessorPackage): + all_slot_types: ActionProcessor[AllSlotTypesAction, AllSlotTypesResult] get_agent_resources: ActionProcessor[GetAgentResourcesAction, GetAgentResourcesResult] search_agent_resources: ActionProcessor[SearchAgentResourcesAction, SearchAgentResourcesResult] get_kernel_allocations: ActionProcessor[GetKernelAllocationsAction, GetKernelAllocationsResult] @@ -46,6 +49,7 @@ class ResourceSlotProcessors(AbstractProcessorPackage): ] def __init__(self, service: ResourceSlotService, action_monitors: list[ActionMonitor]) -> None: + self.all_slot_types = ActionProcessor(service.all_slot_types, action_monitors) self.get_agent_resources = ActionProcessor(service.get_agent_resources, action_monitors) self.search_agent_resources = ActionProcessor( service.search_agent_resources, action_monitors @@ -72,6 +76,7 @@ def __init__(self, service: ResourceSlotService, action_monitors: list[ActionMon @override def supported_actions(self) -> list[ActionSpec]: return [ + AllSlotTypesAction.spec(), GetAgentResourcesAction.spec(), SearchAgentResourcesAction.spec(), GetKernelAllocationsAction.spec(), diff --git a/src/ai/backend/manager/services/resource_slot/service.py b/src/ai/backend/manager/services/resource_slot/service.py index bc771877f79..62e955754c8 100644 --- a/src/ai/backend/manager/services/resource_slot/service.py +++ b/src/ai/backend/manager/services/resource_slot/service.py @@ -9,6 +9,7 @@ ) from ai.backend.manager.repositories.resource_slot.repository import ResourceSlotRepository +from .actions.all_slot_types import AllSlotTypesAction, AllSlotTypesResult from .actions.get_agent_resources import GetAgentResourcesAction, GetAgentResourcesResult from .actions.get_domain_resource_overview import ( GetDomainResourceOverviewAction, @@ -37,6 +38,26 @@ class ResourceSlotService: def __init__(self, repository: ResourceSlotRepository) -> None: self._repository = repository + async def all_slot_types(self, action: AllSlotTypesAction) -> AllSlotTypesResult: + rows = await self._repository.all_slot_types() + items = [ + ResourceSlotTypeData( + slot_name=row.slot_name, + slot_type=row.slot_type, + display_name=row.display_name, + description=row.description, + display_unit=row.display_unit, + display_icon=row.display_icon, + number_format=NumberFormatData( + binary=row.number_format.binary, + round_length=row.number_format.round_length, + ), + rank=row.rank, + ) + for row in rows + ] + return AllSlotTypesResult(items=items) + async def get_agent_resources(self, action: GetAgentResourcesAction) -> GetAgentResourcesResult: rows = await self._repository.get_agent_resources(action.agent_id) items = [ From 4731d2f50d334e17ab680735a4416e56f45c2b5d Mon Sep 17 00:00:00 2001 From: HyeockJinKim Date: Fri, 6 Mar 2026 00:52:49 +0900 Subject: [PATCH 02/17] fix(BA-4904): fix mypy return type in resolve_nodes by using cls.from_data() Replace fetcher-returning-GQL-type pattern in resolve_nodes with data-returning helpers + cls.from_data() calls, following the established pattern in AgentV2GQL. This satisfies mypy's Iterable[Self | None] constraint. Co-Authored-By: Claude Sonnet 4.6 --- .../manager/api/gql/resource_slot/fetcher.py | 79 +++++++++++++++++-- .../manager/api/gql/resource_slot/types.py | 32 ++++---- 2 files changed, 87 insertions(+), 24 deletions(-) diff --git a/src/ai/backend/manager/api/gql/resource_slot/fetcher.py b/src/ai/backend/manager/api/gql/resource_slot/fetcher.py index ea503f23c50..e341d392a4c 100644 --- a/src/ai/backend/manager/api/gql/resource_slot/fetcher.py +++ b/src/ai/backend/manager/api/gql/resource_slot/fetcher.py @@ -7,11 +7,18 @@ from __future__ import annotations +import uuid as _uuid + import strawberry from strawberry import Info from ai.backend.manager.api.gql.base import encode_cursor from ai.backend.manager.api.gql.types import StrawberryGQLContext +from ai.backend.manager.data.resource_slot.types import ( + AgentResourceData, + ResourceAllocationData, + ResourceSlotTypeData, +) from ai.backend.manager.errors.resource_slot import ResourceSlotTypeNotFound from ai.backend.manager.services.resource_slot.actions.all_slot_types import AllSlotTypesAction from ai.backend.manager.services.resource_slot.actions.get_agent_resources import ( @@ -134,11 +141,9 @@ async def fetch_kernel_allocations( kernel_id: str, ) -> ResourceAllocationConnectionGQL: """Fetch all per-slot allocation entries for a kernel (shared for KernelV2GQL connection).""" - import uuid - action_result = ( await info.context.processors.resource_slot.get_kernel_allocations.wait_for_complete( - GetKernelAllocationsAction(kernel_id=uuid.UUID(kernel_id)) + GetKernelAllocationsAction(kernel_id=_uuid.UUID(kernel_id)) ) ) @@ -168,14 +173,76 @@ async def fetch_kernel_resource_allocation( slot_name: str, ) -> KernelResourceAllocationGQL | None: """Fetch a single per-slot allocation for a kernel (used by Node resolution).""" - import uuid - action_result = ( await info.context.processors.resource_slot.get_kernel_allocations.wait_for_complete( - GetKernelAllocationsAction(kernel_id=uuid.UUID(kernel_id_str)) + GetKernelAllocationsAction(kernel_id=_uuid.UUID(kernel_id_str)) ) ) for data in action_result.items: if data.slot_name == slot_name: return KernelResourceAllocationGQL.from_data(data) return None + + +# ========== Raw data helpers for Node.resolve_nodes ========== +# These return raw data types so that resolve_nodes can call cls.from_data(), +# which enables mypy to correctly infer the return type as Iterable[Self | None]. + + +async def load_resource_slot_type_data( + info: Info[StrawberryGQLContext], + slot_name: str, +) -> ResourceSlotTypeData | None: + """Load raw ResourceSlotTypeData for a single slot_name (used by Node.resolve_nodes).""" + try: + action_result = ( + await info.context.processors.resource_slot.get_resource_slot_type.wait_for_complete( + GetResourceSlotTypeAction(slot_name=slot_name) + ) + ) + except ResourceSlotTypeNotFound: + return None + return ResourceSlotTypeData( + slot_name=action_result.item.slot_name, + slot_type=action_result.item.slot_type, + display_name=action_result.item.display_name, + description=action_result.item.description, + display_unit=action_result.item.display_unit, + display_icon=action_result.item.display_icon, + number_format=action_result.item.number_format, + rank=action_result.item.rank, + ) + + +async def load_agent_resource_data( + info: Info[StrawberryGQLContext], + agent_id: str, + slot_name: str, +) -> AgentResourceData | None: + """Load raw AgentResourceData for a single agent+slot (used by Node.resolve_nodes).""" + action_result = ( + await info.context.processors.resource_slot.get_agent_resources.wait_for_complete( + GetAgentResourcesAction(agent_id=agent_id) + ) + ) + for data in action_result.items: + if data.slot_name == slot_name: + return data + return None + + +async def load_kernel_allocation_data( + info: Info[StrawberryGQLContext], + kernel_id_str: str, + slot_name: str, +) -> ResourceAllocationData | None: + """Load raw ResourceAllocationData for a single kernel+slot (used by Node.resolve_nodes).""" + action_result = ( + await info.context.processors.resource_slot.get_kernel_allocations.wait_for_complete( + GetKernelAllocationsAction(kernel_id=_uuid.UUID(kernel_id_str)) + ) + ) + for data in action_result.items: + if data.slot_name == slot_name: + return data + return None diff --git a/src/ai/backend/manager/api/gql/resource_slot/types.py b/src/ai/backend/manager/api/gql/resource_slot/types.py index d45c3d7d4f7..6b8c2f07d9e 100644 --- a/src/ai/backend/manager/api/gql/resource_slot/types.py +++ b/src/ai/backend/manager/api/gql/resource_slot/types.py @@ -84,12 +84,12 @@ async def resolve_nodes( # type: ignore[override] node_ids: Iterable[str], required: bool = False, ) -> Iterable[Self | None]: - from ai.backend.manager.api.gql.resource_slot.fetcher import fetch_resource_slot_type + from ai.backend.manager.api.gql.resource_slot.fetcher import load_resource_slot_type_data - results = [] + results: list[Self | None] = [] for slot_name in node_ids: - node = await fetch_resource_slot_type(info, slot_name) - results.append(node) + data = await load_resource_slot_type_data(info, slot_name) + results.append(cls.from_data(data) if data is not None else None) return results @classmethod @@ -155,15 +155,13 @@ async def resolve_nodes( # type: ignore[override] required: bool = False, ) -> Iterable[Self | None]: # Node ID format: "{agent_id}:{slot_name}" - results = [] + from ai.backend.manager.api.gql.resource_slot.fetcher import load_agent_resource_data + + results: list[Self | None] = [] for node_id in node_ids: agent_id, _, slot_name = node_id.partition(":") - from ai.backend.manager.api.gql.resource_slot.fetcher import ( - fetch_agent_resource_slot, - ) - - node = await fetch_agent_resource_slot(info, agent_id, slot_name) - results.append(node) + data = await load_agent_resource_data(info, agent_id, slot_name) + results.append(cls.from_data(data) if data is not None else None) return results @classmethod @@ -225,15 +223,13 @@ async def resolve_nodes( # type: ignore[override] required: bool = False, ) -> Iterable[Self | None]: # Node ID format: "{kernel_id}:{slot_name}" - results = [] + from ai.backend.manager.api.gql.resource_slot.fetcher import load_kernel_allocation_data + + results: list[Self | None] = [] for node_id in node_ids: kernel_id_str, _, slot_name = node_id.partition(":") - from ai.backend.manager.api.gql.resource_slot.fetcher import ( - fetch_kernel_resource_allocation, - ) - - node = await fetch_kernel_resource_allocation(info, kernel_id_str, slot_name) - results.append(node) + data = await load_kernel_allocation_data(info, kernel_id_str, slot_name) + results.append(cls.from_data(data) if data is not None else None) return results @classmethod From 65093d2152bf7a5a08dd07a25ddcac3c6efaa8fc Mon Sep 17 00:00:00 2001 From: HyeockJinKim Date: Fri, 6 Mar 2026 00:54:25 +0900 Subject: [PATCH 03/17] changelog: add news fragment for PR #9708 --- changes/9708.feature.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changes/9708.feature.md diff --git a/changes/9708.feature.md b/changes/9708.feature.md new file mode 100644 index 00000000000..fdc7e022732 --- /dev/null +++ b/changes/9708.feature.md @@ -0,0 +1 @@ +Add GraphQL ResourceSlotTypeGQL node with root queries (resource_slot_type, resource_slot_types) and relay connections on AgentV2GQL (resource_slots) and KernelV2GQL (resource_allocations). From 70b0df2d4cd2cccf2222133da80725e05139b8fe Mon Sep 17 00:00:00 2001 From: HyeockJinKim Date: Fri, 6 Mar 2026 10:22:28 +0900 Subject: [PATCH 04/17] fix: let ResourceSlotTypeNotFound raise instead of silently returning None Fetcher functions now propagate the exception so GraphQL returns error info to the user. resolve_nodes still catches it to comply with the relay spec (Iterable[Self | None]). Co-Authored-By: Claude Opus 4.6 --- .../manager/api/gql/resource_slot/fetcher.py | 27 +++++++------------ .../manager/api/gql/resource_slot/types.py | 9 +++++-- 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/src/ai/backend/manager/api/gql/resource_slot/fetcher.py b/src/ai/backend/manager/api/gql/resource_slot/fetcher.py index e341d392a4c..2c16775f28d 100644 --- a/src/ai/backend/manager/api/gql/resource_slot/fetcher.py +++ b/src/ai/backend/manager/api/gql/resource_slot/fetcher.py @@ -19,7 +19,6 @@ ResourceAllocationData, ResourceSlotTypeData, ) -from ai.backend.manager.errors.resource_slot import ResourceSlotTypeNotFound from ai.backend.manager.services.resource_slot.actions.all_slot_types import AllSlotTypesAction from ai.backend.manager.services.resource_slot.actions.get_agent_resources import ( GetAgentResourcesAction, @@ -75,16 +74,13 @@ async def fetch_resource_slot_types( async def fetch_resource_slot_type( info: Info[StrawberryGQLContext], slot_name: str, -) -> ResourceSlotTypeGQL | None: +) -> ResourceSlotTypeGQL: """Fetch a single resource slot type by slot_name (used by Node resolution and root query).""" - try: - action_result = ( - await info.context.processors.resource_slot.get_resource_slot_type.wait_for_complete( - GetResourceSlotTypeAction(slot_name=slot_name) - ) + action_result = ( + await info.context.processors.resource_slot.get_resource_slot_type.wait_for_complete( + GetResourceSlotTypeAction(slot_name=slot_name) ) - except ResourceSlotTypeNotFound: - return None + ) return ResourceSlotTypeGQL.from_data(action_result.item) @@ -192,16 +188,13 @@ async def fetch_kernel_resource_allocation( async def load_resource_slot_type_data( info: Info[StrawberryGQLContext], slot_name: str, -) -> ResourceSlotTypeData | None: +) -> ResourceSlotTypeData: """Load raw ResourceSlotTypeData for a single slot_name (used by Node.resolve_nodes).""" - try: - action_result = ( - await info.context.processors.resource_slot.get_resource_slot_type.wait_for_complete( - GetResourceSlotTypeAction(slot_name=slot_name) - ) + action_result = ( + await info.context.processors.resource_slot.get_resource_slot_type.wait_for_complete( + GetResourceSlotTypeAction(slot_name=slot_name) ) - except ResourceSlotTypeNotFound: - return None + ) return ResourceSlotTypeData( slot_name=action_result.item.slot_name, slot_type=action_result.item.slot_type, diff --git a/src/ai/backend/manager/api/gql/resource_slot/types.py b/src/ai/backend/manager/api/gql/resource_slot/types.py index 6b8c2f07d9e..47ff03f7820 100644 --- a/src/ai/backend/manager/api/gql/resource_slot/types.py +++ b/src/ai/backend/manager/api/gql/resource_slot/types.py @@ -85,11 +85,16 @@ async def resolve_nodes( # type: ignore[override] required: bool = False, ) -> Iterable[Self | None]: from ai.backend.manager.api.gql.resource_slot.fetcher import load_resource_slot_type_data + from ai.backend.manager.errors.resource_slot import ResourceSlotTypeNotFound results: list[Self | None] = [] for slot_name in node_ids: - data = await load_resource_slot_type_data(info, slot_name) - results.append(cls.from_data(data) if data is not None else None) + try: + data = await load_resource_slot_type_data(info, slot_name) + except ResourceSlotTypeNotFound: + results.append(None) + else: + results.append(cls.from_data(data)) return results @classmethod From fa417675816340b79cf4af872b52fe58b083da21 Mon Sep 17 00:00:00 2001 From: HyeockJinKim Date: Fri, 6 Mar 2026 01:25:45 +0000 Subject: [PATCH 05/17] chore: update api schema dump Co-authored-by: octodog --- .../graphql-reference/supergraph.graphql | 186 ++++++++++++++++++ .../graphql-reference/v2-schema.graphql | 163 +++++++++++++++ 2 files changed, 349 insertions(+) diff --git a/docs/manager/graphql-reference/supergraph.graphql b/docs/manager/graphql-reference/supergraph.graphql index 2c23c80eb3b..936a1b7145f 100644 --- a/docs/manager/graphql-reference/supergraph.graphql +++ b/docs/manager/graphql-reference/supergraph.graphql @@ -339,6 +339,52 @@ type AgentResource free: JSON! } +"""Added in 26.4.0. Relay-style connection for per-slot agent resources.""" +type AgentResourceConnection + @join__type(graph: STRAWBERRY) +{ + """Pagination data for this connection""" + pageInfo: PageInfo! + + """Contains the nodes in this connection""" + edges: [AgentResourceSlotEdge!]! + count: Int! +} + +""" +Added in 26.4.0. Per-slot resource capacity and usage entry for an agent. +Represents one row from the agent_resources table. +""" +type AgentResourceSlot implements Node + @join__implements(graph: STRAWBERRY, interface: "Node") + @join__type(graph: STRAWBERRY) +{ + """The Globally Unique ID of this object""" + id: ID! + + """Resource slot identifier (e.g., 'cpu', 'mem', 'cuda.device').""" + slotName: String! + + """Total hardware resource capacity for this slot on the agent.""" + capacity: Decimal! + + """ + Amount of this slot currently consumed by running and scheduled sessions. + """ + used: Decimal! +} + +"""An edge in a connection.""" +type AgentResourceSlotEdge + @join__type(graph: STRAWBERRY) +{ + """A cursor for use in pagination""" + cursor: String! + + """The item at the end of the edge""" + node: AgentResourceSlot! +} + """Added in 25.15.0""" type AgentStats @join__type(graph: STRAWBERRY) @@ -525,6 +571,9 @@ type AgentV2 implements Node Added in 26.3.0. List of sessions running on this agent with pagination support. """ sessions(filter: SessionV2Filter = null, orderBy: [SessionV2OrderBy!] = null, before: String = null, after: String = null, first: Int = null, last: Int = null, limit: Int = null, offset: Int = null): SessionV2Connection! + + """Added in 26.4.0. Per-slot resource capacity and usage for this agent.""" + resourceSlots: AgentResourceConnection! } """ @@ -5443,6 +5492,38 @@ type KernelNode implements Node preopen_ports: [Int] } +""" +Added in 26.4.0. Per-slot resource allocation entry for a kernel. +Represents one row from the resource_allocations table. +""" +type KernelResourceAllocation implements Node + @join__implements(graph: STRAWBERRY, interface: "Node") + @join__type(graph: STRAWBERRY) +{ + """The Globally Unique ID of this object""" + id: ID! + + """Resource slot identifier (e.g., 'cpu', 'mem', 'cuda.device').""" + slotName: String! + + """Amount of this resource slot originally requested for the kernel.""" + requested: Decimal! + + """Amount currently used. May be null if not yet measured.""" + used: Decimal +} + +"""An edge in a connection.""" +type KernelResourceAllocationEdge + @join__type(graph: STRAWBERRY) +{ + """A cursor for use in pagination""" + cursor: String! + + """The item at the end of the edge""" + node: KernelResourceAllocation! +} + """ Added in 26.2.0. Represents a kernel (compute container) in Backend.AI. """ @@ -5491,6 +5572,9 @@ type KernelV2 implements Node """Added in 26.3.0. The session this kernel belongs to.""" session: SessionV2 + + """Added in 26.4.0. Per-slot resource allocation for this kernel.""" + resourceAllocations: ResourceAllocationConnection! } """ @@ -7740,6 +7824,21 @@ enum NotificationRuleType ENDPOINT_LIFECYCLE_CHANGED @join__enumValue(graph: STRAWBERRY) } +""" +Added in 26.4.0. Display number format configuration for a resource slot type. +""" +type NumberFormat + @join__type(graph: STRAWBERRY) +{ + """ + Whether to use binary (1024-based) prefix instead of decimal (1000-based). + """ + binary: Boolean! + + """Number of decimal places to display.""" + roundLength: Int! +} + """Added in 25.14.0""" type ObjectStorage implements Node @join__implements(graph: STRAWBERRY, interface: "Node") @@ -9329,6 +9428,14 @@ type Query """ adminSessionsV2(filter: SessionV2Filter = null, orderBy: [SessionV2OrderBy!] = null, before: String = null, after: String = null, first: Int = null, last: Int = null, limit: Int = null, offset: Int = null): SessionV2Connection! @join__field(graph: STRAWBERRY) + """ + Added in 26.4.0. Returns a single resource slot type by slot_name, or null. + """ + resourceSlotType(slotName: String!): ResourceSlotType @join__field(graph: STRAWBERRY) + + """Added in 26.4.0. Returns all registered resource slot types.""" + resourceSlotTypes: ResourceSlotTypeConnection! @join__field(graph: STRAWBERRY) + """ Added in 26.2.0. @@ -9829,6 +9936,20 @@ type ResourceAllocation used: ResourceSlot } +""" +Added in 26.4.0. Relay-style connection for per-slot kernel resource allocations. +""" +type ResourceAllocationConnection + @join__type(graph: STRAWBERRY) +{ + """Pagination data for this connection""" + pageInfo: PageInfo! + + """Contains the nodes in this connection""" + edges: [KernelResourceAllocationEdge!]! + count: Int! +} + type ResourceConfig @join__type(graph: STRAWBERRY) { @@ -10185,6 +10306,71 @@ input ResourceSlotInput entries: [ResourceSlotEntryInput!]! } +""" +Added in 26.4.0. A registered resource slot type describing display metadata +and formatting rules for a specific resource (e.g., cpu, mem, cuda.device). +""" +type ResourceSlotType implements Node + @join__implements(graph: STRAWBERRY, interface: "Node") + @join__type(graph: STRAWBERRY) +{ + """The Globally Unique ID of this object""" + id: ID! + + """ + Unique identifier for the resource slot (e.g., 'cpu', 'mem', 'cuda.device'). + """ + slotName: String! + + """Category of the slot type (e.g., 'count', 'bytes', 'unique-count').""" + slotType: String! + + """Human-readable name for display in UIs.""" + displayName: String! + + """Longer description of what this resource slot represents.""" + description: String! + + """ + Unit label used when displaying resource amounts (e.g., 'GiB', 'cores'). + """ + displayUnit: String! + + """Icon identifier for UI rendering (e.g., 'cpu', 'memory', 'gpu').""" + displayIcon: String! + + """Number formatting rules (binary vs decimal prefix, rounding).""" + numberFormat: NumberFormat! + + """Display ordering rank. Lower values appear first.""" + rank: Int! +} + +""" +Added in 26.4.0. Relay-style connection for paginated resource slot types. +""" +type ResourceSlotTypeConnection + @join__type(graph: STRAWBERRY) +{ + """Pagination data for this connection""" + pageInfo: PageInfo! + + """Contains the nodes in this connection""" + edges: [ResourceSlotTypeEdge!]! + count: Int! +} + +"""An edge in a connection.""" +type ResourceSlotTypeEdge + @join__type(graph: STRAWBERRY) +{ + """A cursor for use in pagination""" + cursor: String! + + """The item at the end of the edge""" + node: ResourceSlotType! +} + """ Added in 26.2.0. Resource weight with default indicator. Shows whether this resource type's weight was explicitly set or uses default. """ diff --git a/docs/manager/graphql-reference/v2-schema.graphql b/docs/manager/graphql-reference/v2-schema.graphql index 3a7d9e6167d..ca3ebf5e130 100644 --- a/docs/manager/graphql-reference/v2-schema.graphql +++ b/docs/manager/graphql-reference/v2-schema.graphql @@ -170,6 +170,45 @@ type AgentResource { free: JSON! } +"""Added in 26.4.0. Relay-style connection for per-slot agent resources.""" +type AgentResourceConnection { + """Pagination data for this connection""" + pageInfo: PageInfo! + + """Contains the nodes in this connection""" + edges: [AgentResourceSlotEdge!]! + count: Int! +} + +""" +Added in 26.4.0. Per-slot resource capacity and usage entry for an agent. +Represents one row from the agent_resources table. +""" +type AgentResourceSlot implements Node { + """The Globally Unique ID of this object""" + id: ID! + + """Resource slot identifier (e.g., 'cpu', 'mem', 'cuda.device').""" + slotName: String! + + """Total hardware resource capacity for this slot on the agent.""" + capacity: Decimal! + + """ + Amount of this slot currently consumed by running and scheduled sessions. + """ + used: Decimal! +} + +"""An edge in a connection.""" +type AgentResourceSlotEdge { + """A cursor for use in pagination""" + cursor: String! + + """The item at the end of the edge""" + node: AgentResourceSlot! +} + """Added in 25.15.0""" type AgentStats { """Added in 25.15.0""" @@ -321,6 +360,9 @@ type AgentV2 implements Node { Added in 26.3.0. List of sessions running on this agent with pagination support. """ sessions(filter: SessionV2Filter = null, orderBy: [SessionV2OrderBy!] = null, before: String = null, after: String = null, first: Int = null, last: Int = null, limit: Int = null, offset: Int = null): SessionV2Connection! + + """Added in 26.4.0. Per-slot resource capacity and usage for this agent.""" + resourceSlots: AgentResourceConnection! } """ @@ -3016,6 +3058,33 @@ The `JSON` scalar type represents JSON values as specified by [ECMA-404](https:/ """ scalar JSON @specifiedBy(url: "https://ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf") +""" +Added in 26.4.0. Per-slot resource allocation entry for a kernel. +Represents one row from the resource_allocations table. +""" +type KernelResourceAllocation implements Node { + """The Globally Unique ID of this object""" + id: ID! + + """Resource slot identifier (e.g., 'cpu', 'mem', 'cuda.device').""" + slotName: String! + + """Amount of this resource slot originally requested for the kernel.""" + requested: Decimal! + + """Amount currently used. May be null if not yet measured.""" + used: Decimal +} + +"""An edge in a connection.""" +type KernelResourceAllocationEdge { + """A cursor for use in pagination""" + cursor: String! + + """The item at the end of the edge""" + node: KernelResourceAllocation! +} + """ Added in 26.2.0. Represents a kernel (compute container) in Backend.AI. """ @@ -3061,6 +3130,9 @@ type KernelV2 implements Node { """Added in 26.3.0. The session this kernel belongs to.""" session: SessionV2 + + """Added in 26.4.0. Per-slot resource allocation for this kernel.""" + resourceAllocations: ResourceAllocationConnection! } """ @@ -4090,6 +4162,19 @@ enum NotificationRuleType { ENDPOINT_LIFECYCLE_CHANGED } +""" +Added in 26.4.0. Display number format configuration for a resource slot type. +""" +type NumberFormat { + """ + Whether to use binary (1024-based) prefix instead of decimal (1000-based). + """ + binary: Boolean! + + """Number of decimal places to display.""" + roundLength: Int! +} + """Added in 25.14.0""" type ObjectStorage implements Node { """The Globally Unique ID of this object""" @@ -5056,6 +5141,14 @@ type Query { """ adminSessionsV2(filter: SessionV2Filter = null, orderBy: [SessionV2OrderBy!] = null, before: String = null, after: String = null, first: Int = null, last: Int = null, limit: Int = null, offset: Int = null): SessionV2Connection! + """ + Added in 26.4.0. Returns a single resource slot type by slot_name, or null. + """ + resourceSlotType(slotName: String!): ResourceSlotType + + """Added in 26.4.0. Returns all registered resource slot types.""" + resourceSlotTypes: ResourceSlotTypeConnection! + """ Added in 26.2.0. @@ -5560,6 +5653,18 @@ type ResourceAllocation { used: ResourceSlot } +""" +Added in 26.4.0. Relay-style connection for per-slot kernel resource allocations. +""" +type ResourceAllocationConnection { + """Pagination data for this connection""" + pageInfo: PageInfo! + + """Contains the nodes in this connection""" + edges: [KernelResourceAllocationEdge!]! + count: Int! +} + type ResourceConfig { """ Added in 26.1.0. Allocated compute resources including CPU, memory, and accelerators. @@ -5834,6 +5939,64 @@ input ResourceSlotInput { entries: [ResourceSlotEntryInput!]! } +""" +Added in 26.4.0. A registered resource slot type describing display metadata +and formatting rules for a specific resource (e.g., cpu, mem, cuda.device). +""" +type ResourceSlotType implements Node { + """The Globally Unique ID of this object""" + id: ID! + + """ + Unique identifier for the resource slot (e.g., 'cpu', 'mem', 'cuda.device'). + """ + slotName: String! + + """Category of the slot type (e.g., 'count', 'bytes', 'unique-count').""" + slotType: String! + + """Human-readable name for display in UIs.""" + displayName: String! + + """Longer description of what this resource slot represents.""" + description: String! + + """ + Unit label used when displaying resource amounts (e.g., 'GiB', 'cores'). + """ + displayUnit: String! + + """Icon identifier for UI rendering (e.g., 'cpu', 'memory', 'gpu').""" + displayIcon: String! + + """Number formatting rules (binary vs decimal prefix, rounding).""" + numberFormat: NumberFormat! + + """Display ordering rank. Lower values appear first.""" + rank: Int! +} + +""" +Added in 26.4.0. Relay-style connection for paginated resource slot types. +""" +type ResourceSlotTypeConnection { + """Pagination data for this connection""" + pageInfo: PageInfo! + + """Contains the nodes in this connection""" + edges: [ResourceSlotTypeEdge!]! + count: Int! +} + +"""An edge in a connection.""" +type ResourceSlotTypeEdge { + """A cursor for use in pagination""" + cursor: String! + + """The item at the end of the edge""" + node: ResourceSlotType! +} + """ Added in 26.2.0. Resource weight with default indicator. Shows whether this resource type's weight was explicitly set or uses default. """ From 6efa2de9b0d016af355cbbf0ebac471c0cc0edc4 Mon Sep 17 00:00:00 2001 From: HyeockJinKim Date: Fri, 6 Mar 2026 10:48:00 +0900 Subject: [PATCH 06/17] fix(BA-4904): change version strings from 26.4.0 to 26.3.0 Co-Authored-By: Claude Sonnet 4.6 --- src/ai/backend/manager/api/gql/agent/types.py | 2 +- src/ai/backend/manager/api/gql/kernel/types.py | 2 +- .../manager/api/gql/resource_slot/resolver.py | 4 ++-- .../backend/manager/api/gql/resource_slot/types.py | 14 +++++++------- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/ai/backend/manager/api/gql/agent/types.py b/src/ai/backend/manager/api/gql/agent/types.py index 70f8f8fed38..cad67fbbc0e 100644 --- a/src/ai/backend/manager/api/gql/agent/types.py +++ b/src/ai/backend/manager/api/gql/agent/types.py @@ -491,7 +491,7 @@ async def sessions( ) @strawberry.field( # type: ignore[misc] - description="Added in 26.4.0. Per-slot resource capacity and usage for this agent." + description="Added in 26.3.0. Per-slot resource capacity and usage for this agent." ) async def resource_slots( self, diff --git a/src/ai/backend/manager/api/gql/kernel/types.py b/src/ai/backend/manager/api/gql/kernel/types.py index b39dbb3d7a9..52c1eb31ee1 100644 --- a/src/ai/backend/manager/api/gql/kernel/types.py +++ b/src/ai/backend/manager/api/gql/kernel/types.py @@ -466,7 +466,7 @@ async def session( raise NotImplementedError @strawberry.field( # type: ignore[misc] - description="Added in 26.4.0. Per-slot resource allocation for this kernel." + description="Added in 26.3.0. Per-slot resource allocation for this kernel." ) async def resource_allocations( self, diff --git a/src/ai/backend/manager/api/gql/resource_slot/resolver.py b/src/ai/backend/manager/api/gql/resource_slot/resolver.py index c2ce34608cd..5b7250c22e2 100644 --- a/src/ai/backend/manager/api/gql/resource_slot/resolver.py +++ b/src/ai/backend/manager/api/gql/resource_slot/resolver.py @@ -12,7 +12,7 @@ @strawberry.field( - description="Added in 26.4.0. Returns a single resource slot type by slot_name, or null." + description="Added in 26.3.0. Returns a single resource slot type by slot_name, or null." ) # type: ignore[misc] async def resource_slot_type( info: Info[StrawberryGQLContext], @@ -21,7 +21,7 @@ async def resource_slot_type( return await fetch_resource_slot_type(info, slot_name) -@strawberry.field(description="Added in 26.4.0. Returns all registered resource slot types.") # type: ignore[misc] +@strawberry.field(description="Added in 26.3.0. Returns all registered resource slot types.") # type: ignore[misc] async def resource_slot_types( info: Info[StrawberryGQLContext], ) -> ResourceSlotTypeConnectionGQL: diff --git a/src/ai/backend/manager/api/gql/resource_slot/types.py b/src/ai/backend/manager/api/gql/resource_slot/types.py index 47ff03f7820..7cd17fdf864 100644 --- a/src/ai/backend/manager/api/gql/resource_slot/types.py +++ b/src/ai/backend/manager/api/gql/resource_slot/types.py @@ -30,7 +30,7 @@ @strawberry.type( name="NumberFormat", - description="Added in 26.4.0. Display number format configuration for a resource slot type.", + description="Added in 26.3.0. Display number format configuration for a resource slot type.", ) class NumberFormatGQL: binary: bool = strawberry.field( @@ -49,7 +49,7 @@ def from_data(cls, data: NumberFormatData) -> Self: @strawberry.type( name="ResourceSlotType", description=dedent_strip(""" - Added in 26.4.0. A registered resource slot type describing display metadata + Added in 26.3.0. A registered resource slot type describing display metadata and formatting rules for a specific resource (e.g., cpu, mem, cuda.device). """), ) @@ -117,7 +117,7 @@ def from_data(cls, data: ResourceSlotTypeData) -> Self: @strawberry.type( name="ResourceSlotTypeConnection", - description="Added in 26.4.0. Relay-style connection for paginated resource slot types.", + description="Added in 26.3.0. Relay-style connection for paginated resource slot types.", ) class ResourceSlotTypeConnectionGQL(Connection[ResourceSlotTypeGQL]): count: int @@ -133,7 +133,7 @@ def __init__(self, *args: Any, count: int, **kwargs: Any) -> None: @strawberry.type( name="AgentResourceSlot", description=dedent_strip(""" - Added in 26.4.0. Per-slot resource capacity and usage entry for an agent. + Added in 26.3.0. Per-slot resource capacity and usage entry for an agent. Represents one row from the agent_resources table. """), ) @@ -185,7 +185,7 @@ def from_data(cls, data: AgentResourceData) -> Self: @strawberry.type( name="AgentResourceConnection", - description="Added in 26.4.0. Relay-style connection for per-slot agent resources.", + description="Added in 26.3.0. Relay-style connection for per-slot agent resources.", ) class AgentResourceConnectionGQL(Connection[AgentResourceSlotGQL]): count: int @@ -201,7 +201,7 @@ def __init__(self, *args: Any, count: int, **kwargs: Any) -> None: @strawberry.type( name="KernelResourceAllocation", description=dedent_strip(""" - Added in 26.4.0. Per-slot resource allocation entry for a kernel. + Added in 26.3.0. Per-slot resource allocation entry for a kernel. Represents one row from the resource_allocations table. """), ) @@ -253,7 +253,7 @@ def from_data(cls, data: ResourceAllocationData) -> Self: @strawberry.type( name="ResourceAllocationConnection", - description="Added in 26.4.0. Relay-style connection for per-slot kernel resource allocations.", + description="Added in 26.3.0. Relay-style connection for per-slot kernel resource allocations.", ) class ResourceAllocationConnectionGQL(Connection[KernelResourceAllocationGQL]): count: int From 0cec971f6bf0d113379107a43fb224867235efd5 Mon Sep 17 00:00:00 2001 From: HyeockJinKim Date: Fri, 6 Mar 2026 10:49:56 +0900 Subject: [PATCH 07/17] refactor(BA-4904): extract _row_to_slot_type_data helper in service.py Eliminate duplicated ResourceSlotTypeData construction between all_slot_types() and get_resource_slot_type() methods. Co-Authored-By: Claude Sonnet 4.6 --- .../manager/services/resource_slot/service.py | 49 +++++++------------ 1 file changed, 19 insertions(+), 30 deletions(-) diff --git a/src/ai/backend/manager/services/resource_slot/service.py b/src/ai/backend/manager/services/resource_slot/service.py index 62e955754c8..f2fdac8eb7d 100644 --- a/src/ai/backend/manager/services/resource_slot/service.py +++ b/src/ai/backend/manager/services/resource_slot/service.py @@ -7,6 +7,7 @@ ResourceOccupancy, ResourceSlotTypeData, ) +from ai.backend.manager.models.resource_slot import ResourceSlotTypeRow from ai.backend.manager.repositories.resource_slot.repository import ResourceSlotRepository from .actions.all_slot_types import AllSlotTypesAction, AllSlotTypesResult @@ -32,6 +33,22 @@ ) +def _row_to_slot_type_data(row: ResourceSlotTypeRow) -> ResourceSlotTypeData: + return ResourceSlotTypeData( + slot_name=row.slot_name, + slot_type=row.slot_type, + display_name=row.display_name, + description=row.description, + display_unit=row.display_unit, + display_icon=row.display_icon, + number_format=NumberFormatData( + binary=row.number_format.binary, + round_length=row.number_format.round_length, + ), + rank=row.rank, + ) + + class ResourceSlotService: _repository: ResourceSlotRepository @@ -40,22 +57,7 @@ def __init__(self, repository: ResourceSlotRepository) -> None: async def all_slot_types(self, action: AllSlotTypesAction) -> AllSlotTypesResult: rows = await self._repository.all_slot_types() - items = [ - ResourceSlotTypeData( - slot_name=row.slot_name, - slot_type=row.slot_type, - display_name=row.display_name, - description=row.description, - display_unit=row.display_unit, - display_icon=row.display_icon, - number_format=NumberFormatData( - binary=row.number_format.binary, - round_length=row.number_format.round_length, - ), - rank=row.rank, - ) - for row in rows - ] + items = [_row_to_slot_type_data(row) for row in rows] return AllSlotTypesResult(items=items) async def get_agent_resources(self, action: GetAgentResourcesAction) -> GetAgentResourcesResult: @@ -112,20 +114,7 @@ async def get_resource_slot_type( self, action: GetResourceSlotTypeAction ) -> GetResourceSlotTypeResult: row = await self._repository.get_slot_type(action.slot_name) - item = ResourceSlotTypeData( - slot_name=row.slot_name, - slot_type=row.slot_type, - display_name=row.display_name, - description=row.description, - display_unit=row.display_unit, - display_icon=row.display_icon, - number_format=NumberFormatData( - binary=row.number_format.binary, - round_length=row.number_format.round_length, - ), - rank=row.rank, - ) - return GetResourceSlotTypeResult(item=item) + return GetResourceSlotTypeResult(item=_row_to_slot_type_data(row)) async def search_resource_slot_types( self, action: SearchResourceSlotTypesAction From b9a0a58d8fc8becd36925aa65fa198e8012a12a1 Mon Sep 17 00:00:00 2001 From: HyeockJinKim Date: Fri, 6 Mar 2026 01:53:10 +0000 Subject: [PATCH 08/17] chore: update api schema dump Co-authored-by: octodog --- .../graphql-reference/supergraph.graphql | 20 +++++++++---------- .../graphql-reference/v2-schema.graphql | 20 +++++++++---------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/docs/manager/graphql-reference/supergraph.graphql b/docs/manager/graphql-reference/supergraph.graphql index 936a1b7145f..308bd0208fc 100644 --- a/docs/manager/graphql-reference/supergraph.graphql +++ b/docs/manager/graphql-reference/supergraph.graphql @@ -339,7 +339,7 @@ type AgentResource free: JSON! } -"""Added in 26.4.0. Relay-style connection for per-slot agent resources.""" +"""Added in 26.3.0. Relay-style connection for per-slot agent resources.""" type AgentResourceConnection @join__type(graph: STRAWBERRY) { @@ -352,7 +352,7 @@ type AgentResourceConnection } """ -Added in 26.4.0. Per-slot resource capacity and usage entry for an agent. +Added in 26.3.0. Per-slot resource capacity and usage entry for an agent. Represents one row from the agent_resources table. """ type AgentResourceSlot implements Node @@ -5493,7 +5493,7 @@ type KernelNode implements Node } """ -Added in 26.4.0. Per-slot resource allocation entry for a kernel. +Added in 26.3.0. Per-slot resource allocation entry for a kernel. Represents one row from the resource_allocations table. """ type KernelResourceAllocation implements Node @@ -5573,7 +5573,7 @@ type KernelV2 implements Node """Added in 26.3.0. The session this kernel belongs to.""" session: SessionV2 - """Added in 26.4.0. Per-slot resource allocation for this kernel.""" + """Added in 26.3.0. Per-slot resource allocation for this kernel.""" resourceAllocations: ResourceAllocationConnection! } @@ -7825,7 +7825,7 @@ enum NotificationRuleType } """ -Added in 26.4.0. Display number format configuration for a resource slot type. +Added in 26.3.0. Display number format configuration for a resource slot type. """ type NumberFormat @join__type(graph: STRAWBERRY) @@ -9429,11 +9429,11 @@ type Query adminSessionsV2(filter: SessionV2Filter = null, orderBy: [SessionV2OrderBy!] = null, before: String = null, after: String = null, first: Int = null, last: Int = null, limit: Int = null, offset: Int = null): SessionV2Connection! @join__field(graph: STRAWBERRY) """ - Added in 26.4.0. Returns a single resource slot type by slot_name, or null. + Added in 26.3.0. Returns a single resource slot type by slot_name, or null. """ resourceSlotType(slotName: String!): ResourceSlotType @join__field(graph: STRAWBERRY) - """Added in 26.4.0. Returns all registered resource slot types.""" + """Added in 26.3.0. Returns all registered resource slot types.""" resourceSlotTypes: ResourceSlotTypeConnection! @join__field(graph: STRAWBERRY) """ @@ -9937,7 +9937,7 @@ type ResourceAllocation } """ -Added in 26.4.0. Relay-style connection for per-slot kernel resource allocations. +Added in 26.3.0. Relay-style connection for per-slot kernel resource allocations. """ type ResourceAllocationConnection @join__type(graph: STRAWBERRY) @@ -10307,7 +10307,7 @@ input ResourceSlotInput } """ -Added in 26.4.0. A registered resource slot type describing display metadata +Added in 26.3.0. A registered resource slot type describing display metadata and formatting rules for a specific resource (e.g., cpu, mem, cuda.device). """ type ResourceSlotType implements Node @@ -10347,7 +10347,7 @@ type ResourceSlotType implements Node } """ -Added in 26.4.0. Relay-style connection for paginated resource slot types. +Added in 26.3.0. Relay-style connection for paginated resource slot types. """ type ResourceSlotTypeConnection @join__type(graph: STRAWBERRY) diff --git a/docs/manager/graphql-reference/v2-schema.graphql b/docs/manager/graphql-reference/v2-schema.graphql index ca3ebf5e130..d4193227290 100644 --- a/docs/manager/graphql-reference/v2-schema.graphql +++ b/docs/manager/graphql-reference/v2-schema.graphql @@ -170,7 +170,7 @@ type AgentResource { free: JSON! } -"""Added in 26.4.0. Relay-style connection for per-slot agent resources.""" +"""Added in 26.3.0. Relay-style connection for per-slot agent resources.""" type AgentResourceConnection { """Pagination data for this connection""" pageInfo: PageInfo! @@ -181,7 +181,7 @@ type AgentResourceConnection { } """ -Added in 26.4.0. Per-slot resource capacity and usage entry for an agent. +Added in 26.3.0. Per-slot resource capacity and usage entry for an agent. Represents one row from the agent_resources table. """ type AgentResourceSlot implements Node { @@ -3059,7 +3059,7 @@ The `JSON` scalar type represents JSON values as specified by [ECMA-404](https:/ scalar JSON @specifiedBy(url: "https://ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf") """ -Added in 26.4.0. Per-slot resource allocation entry for a kernel. +Added in 26.3.0. Per-slot resource allocation entry for a kernel. Represents one row from the resource_allocations table. """ type KernelResourceAllocation implements Node { @@ -3131,7 +3131,7 @@ type KernelV2 implements Node { """Added in 26.3.0. The session this kernel belongs to.""" session: SessionV2 - """Added in 26.4.0. Per-slot resource allocation for this kernel.""" + """Added in 26.3.0. Per-slot resource allocation for this kernel.""" resourceAllocations: ResourceAllocationConnection! } @@ -4163,7 +4163,7 @@ enum NotificationRuleType { } """ -Added in 26.4.0. Display number format configuration for a resource slot type. +Added in 26.3.0. Display number format configuration for a resource slot type. """ type NumberFormat { """ @@ -5142,11 +5142,11 @@ type Query { adminSessionsV2(filter: SessionV2Filter = null, orderBy: [SessionV2OrderBy!] = null, before: String = null, after: String = null, first: Int = null, last: Int = null, limit: Int = null, offset: Int = null): SessionV2Connection! """ - Added in 26.4.0. Returns a single resource slot type by slot_name, or null. + Added in 26.3.0. Returns a single resource slot type by slot_name, or null. """ resourceSlotType(slotName: String!): ResourceSlotType - """Added in 26.4.0. Returns all registered resource slot types.""" + """Added in 26.3.0. Returns all registered resource slot types.""" resourceSlotTypes: ResourceSlotTypeConnection! """ @@ -5654,7 +5654,7 @@ type ResourceAllocation { } """ -Added in 26.4.0. Relay-style connection for per-slot kernel resource allocations. +Added in 26.3.0. Relay-style connection for per-slot kernel resource allocations. """ type ResourceAllocationConnection { """Pagination data for this connection""" @@ -5940,7 +5940,7 @@ input ResourceSlotInput { } """ -Added in 26.4.0. A registered resource slot type describing display metadata +Added in 26.3.0. A registered resource slot type describing display metadata and formatting rules for a specific resource (e.g., cpu, mem, cuda.device). """ type ResourceSlotType implements Node { @@ -5977,7 +5977,7 @@ type ResourceSlotType implements Node { } """ -Added in 26.4.0. Relay-style connection for paginated resource slot types. +Added in 26.3.0. Relay-style connection for paginated resource slot types. """ type ResourceSlotTypeConnection { """Pagination data for this connection""" From 1e9310ad2b2d6218ae58b8317274815db847c70e Mon Sep 17 00:00:00 2001 From: HyeockJinKim Date: Fri, 6 Mar 2026 10:56:44 +0900 Subject: [PATCH 09/17] refactor(BA-4904): replace AllSlotTypesAction with search pattern for resource_slot_types - Remove AllSlotTypesAction from service, processors, and actions/__init__.py - Add ResourceSlotTypeFilterGQL, ResourceSlotTypeOrderFieldGQL, ResourceSlotTypeOrderByGQL to types.py - Add CursorConditions.by_cursor_forward/backward to query.py for cursor pagination - Update fetch_resource_slot_types fetcher to use build_querier() + SearchResourceSlotTypesAction with computed PageInfo (has_next_page/has_previous_page from actual results) - Update resource_slot_types resolver to accept pagination args (first/after/last/before/limit/offset) and filter/order_by, following session GQL pattern Co-Authored-By: Claude Sonnet 4.6 --- .../manager/api/gql/resource_slot/fetcher.py | 72 +++++++++++++---- .../manager/api/gql/resource_slot/resolver.py | 31 ++++++- .../manager/api/gql/resource_slot/types.py | 81 ++++++++++++++++++- .../repositories/resource_slot/query.py | 20 +++++ .../resource_slot/actions/__init__.py | 3 - .../services/resource_slot/processors.py | 5 -- .../manager/services/resource_slot/service.py | 6 -- 7 files changed, 183 insertions(+), 35 deletions(-) diff --git a/src/ai/backend/manager/api/gql/resource_slot/fetcher.py b/src/ai/backend/manager/api/gql/resource_slot/fetcher.py index 2c16775f28d..08817741d8a 100644 --- a/src/ai/backend/manager/api/gql/resource_slot/fetcher.py +++ b/src/ai/backend/manager/api/gql/resource_slot/fetcher.py @@ -8,10 +8,12 @@ from __future__ import annotations import uuid as _uuid +from functools import lru_cache import strawberry from strawberry import Info +from ai.backend.manager.api.gql.adapter import PaginationOptions, PaginationSpec from ai.backend.manager.api.gql.base import encode_cursor from ai.backend.manager.api.gql.types import StrawberryGQLContext from ai.backend.manager.data.resource_slot.types import ( @@ -19,7 +21,8 @@ ResourceAllocationData, ResourceSlotTypeData, ) -from ai.backend.manager.services.resource_slot.actions.all_slot_types import AllSlotTypesAction +from ai.backend.manager.models.resource_slot import ResourceSlotTypeRow +from ai.backend.manager.repositories.resource_slot.query import CursorConditions, QueryOrders from ai.backend.manager.services.resource_slot.actions.get_agent_resources import ( GetAgentResourcesAction, ) @@ -29,6 +32,9 @@ from ai.backend.manager.services.resource_slot.actions.get_resource_slot_type import ( GetResourceSlotTypeAction, ) +from ai.backend.manager.services.resource_slot.actions.search_resource_slot_types import ( + SearchResourceSlotTypesAction, +) from .types import ( AgentResourceConnectionGQL, @@ -39,35 +45,67 @@ ResourceAllocationConnectionGQL, ResourceSlotTypeConnectionGQL, ResourceSlotTypeEdgeGQL, + ResourceSlotTypeFilterGQL, ResourceSlotTypeGQL, + ResourceSlotTypeOrderByGQL, ) +@lru_cache(maxsize=1) +def _get_slot_type_pagination_spec() -> PaginationSpec: + return PaginationSpec( + forward_order=QueryOrders.slot_name(ascending=True), + backward_order=QueryOrders.slot_name(ascending=False), + forward_condition_factory=CursorConditions.by_cursor_forward, + backward_condition_factory=CursorConditions.by_cursor_backward, + tiebreaker_order=ResourceSlotTypeRow.slot_name.asc(), + ) + + async def fetch_resource_slot_types( info: Info[StrawberryGQLContext], + filter: ResourceSlotTypeFilterGQL | None = None, + order_by: list[ResourceSlotTypeOrderByGQL] | None = None, + before: str | None = None, + after: str | None = None, + first: int | None = None, + last: int | None = None, + limit: int | None = None, + offset: int | None = None, ) -> ResourceSlotTypeConnectionGQL: - """Fetch all registered resource slot types (shared between root query and node resolver).""" - action_result = await info.context.processors.resource_slot.all_slot_types.wait_for_complete( - AllSlotTypesAction() + """Fetch resource slot types with pagination and filtering.""" + querier = info.context.gql_adapter.build_querier( + PaginationOptions( + first=first, + after=after, + last=last, + before=before, + limit=limit, + offset=offset, + ), + pagination_spec=_get_slot_type_pagination_spec(), + filter=filter, + order_by=order_by, ) - edges = [] - for data in action_result.items: - node = ResourceSlotTypeGQL.from_data(data) - cursor = encode_cursor(data.slot_name) - edges.append(ResourceSlotTypeEdgeGQL(node=node, cursor=cursor)) - - page_info = strawberry.relay.PageInfo( - has_next_page=False, - has_previous_page=False, - start_cursor=edges[0].cursor if edges else None, - end_cursor=edges[-1].cursor if edges else None, + action_result = ( + await info.context.processors.resource_slot.search_resource_slot_types.wait_for_complete( + SearchResourceSlotTypesAction(querier=querier) + ) ) + nodes = [ResourceSlotTypeGQL.from_data(data) for data in action_result.items] + edges = [ResourceSlotTypeEdgeGQL(node=node, cursor=encode_cursor(node.id)) for node in nodes] + return ResourceSlotTypeConnectionGQL( - count=len(edges), edges=edges, - page_info=page_info, + page_info=strawberry.relay.PageInfo( + has_next_page=action_result.has_next_page, + has_previous_page=action_result.has_previous_page, + start_cursor=edges[0].cursor if edges else None, + end_cursor=edges[-1].cursor if edges else None, + ), + count=action_result.total_count, ) diff --git a/src/ai/backend/manager/api/gql/resource_slot/resolver.py b/src/ai/backend/manager/api/gql/resource_slot/resolver.py index 5b7250c22e2..c764ae71a7a 100644 --- a/src/ai/backend/manager/api/gql/resource_slot/resolver.py +++ b/src/ai/backend/manager/api/gql/resource_slot/resolver.py @@ -8,7 +8,12 @@ from ai.backend.manager.api.gql.types import StrawberryGQLContext from .fetcher import fetch_resource_slot_type, fetch_resource_slot_types -from .types import ResourceSlotTypeConnectionGQL, ResourceSlotTypeGQL +from .types import ( + ResourceSlotTypeConnectionGQL, + ResourceSlotTypeFilterGQL, + ResourceSlotTypeGQL, + ResourceSlotTypeOrderByGQL, +) @strawberry.field( @@ -21,8 +26,28 @@ async def resource_slot_type( return await fetch_resource_slot_type(info, slot_name) -@strawberry.field(description="Added in 26.3.0. Returns all registered resource slot types.") # type: ignore[misc] +@strawberry.field( + description="Added in 26.3.0. Returns resource slot types with pagination and filtering." +) # type: ignore[misc] async def resource_slot_types( info: Info[StrawberryGQLContext], + filter: ResourceSlotTypeFilterGQL | None = None, + order_by: list[ResourceSlotTypeOrderByGQL] | None = None, + before: str | None = None, + after: str | None = None, + first: int | None = None, + last: int | None = None, + limit: int | None = None, + offset: int | None = None, ) -> ResourceSlotTypeConnectionGQL: - return await fetch_resource_slot_types(info) + return await fetch_resource_slot_types( + info, + filter=filter, + order_by=order_by, + before=before, + after=after, + first=first, + last=last, + limit=limit, + offset=offset, + ) diff --git a/src/ai/backend/manager/api/gql/resource_slot/types.py b/src/ai/backend/manager/api/gql/resource_slot/types.py index 7cd17fdf864..72a4f93d61b 100644 --- a/src/ai/backend/manager/api/gql/resource_slot/types.py +++ b/src/ai/backend/manager/api/gql/resource_slot/types.py @@ -10,13 +10,15 @@ from collections.abc import Iterable from decimal import Decimal +from enum import StrEnum from typing import Any, Self import strawberry from strawberry import ID, Info from strawberry.relay import Connection, Edge, Node, NodeID -from ai.backend.manager.api.gql.types import StrawberryGQLContext +from ai.backend.manager.api.gql.base import OrderDirection, StringFilter +from ai.backend.manager.api.gql.types import GQLFilter, GQLOrderBy, StrawberryGQLContext from ai.backend.manager.api.gql.utils import dedent_strip from ai.backend.manager.data.resource_slot.types import ( AgentResourceData, @@ -24,6 +26,8 @@ ResourceAllocationData, ResourceSlotTypeData, ) +from ai.backend.manager.repositories.base import QueryCondition, QueryOrder +from ai.backend.manager.repositories.resource_slot.query import QueryConditions, QueryOrders # ========== NumberFormat ========== @@ -127,6 +131,81 @@ def __init__(self, *args: Any, count: int, **kwargs: Any) -> None: self.count = count +# ========== ResourceSlotType Filter/OrderBy ========== + + +@strawberry.enum( + name="ResourceSlotTypeOrderField", + description="Added in 26.3.0. Fields available for ordering resource slot types.", +) +class ResourceSlotTypeOrderFieldGQL(StrEnum): + SLOT_NAME = "slot_name" + RANK = "rank" + DISPLAY_NAME = "display_name" + + +@strawberry.input( + name="ResourceSlotTypeFilter", + description="Added in 26.3.0. Filter criteria for querying resource slot types.", +) +class ResourceSlotTypeFilterGQL(GQLFilter): + slot_name: StringFilter | None = None + slot_type: StringFilter | None = None + display_name: StringFilter | None = None + + def build_conditions(self) -> list[QueryCondition]: + conditions: list[QueryCondition] = [] + if self.slot_name: + condition = self.slot_name.build_query_condition( + contains_factory=QueryConditions.by_slot_name_contains, + equals_factory=QueryConditions.by_slot_name_equals, + starts_with_factory=QueryConditions.by_slot_name_starts_with, + ends_with_factory=QueryConditions.by_slot_name_ends_with, + ) + if condition: + conditions.append(condition) + if self.slot_type: + condition = self.slot_type.build_query_condition( + contains_factory=QueryConditions.by_slot_type_contains, + equals_factory=QueryConditions.by_slot_type_equals, + starts_with_factory=QueryConditions.by_slot_type_starts_with, + ends_with_factory=QueryConditions.by_slot_type_ends_with, + ) + if condition: + conditions.append(condition) + if self.display_name: + condition = self.display_name.build_query_condition( + contains_factory=QueryConditions.by_display_name_contains, + equals_factory=QueryConditions.by_display_name_equals, + starts_with_factory=QueryConditions.by_display_name_starts_with, + ends_with_factory=QueryConditions.by_display_name_ends_with, + ) + if condition: + conditions.append(condition) + return conditions + + +@strawberry.input( + name="ResourceSlotTypeOrderBy", + description="Added in 26.3.0. Ordering specification for resource slot types.", +) +class ResourceSlotTypeOrderByGQL(GQLOrderBy): + field: ResourceSlotTypeOrderFieldGQL + direction: OrderDirection = OrderDirection.ASC + + def to_query_order(self) -> QueryOrder: + ascending = self.direction == OrderDirection.ASC + match self.field: + case ResourceSlotTypeOrderFieldGQL.SLOT_NAME: + return QueryOrders.slot_name(ascending) + case ResourceSlotTypeOrderFieldGQL.RANK: + return QueryOrders.rank(ascending) + case ResourceSlotTypeOrderFieldGQL.DISPLAY_NAME: + return QueryOrders.display_name(ascending) + case _: + raise ValueError(f"Unhandled ResourceSlotTypeOrderFieldGQL value: {self.field!r}") + + # ========== AgentResourceSlotGQL (Node) ========== diff --git a/src/ai/backend/manager/repositories/resource_slot/query.py b/src/ai/backend/manager/repositories/resource_slot/query.py index d584d685992..d68cff1b5cf 100644 --- a/src/ai/backend/manager/repositories/resource_slot/query.py +++ b/src/ai/backend/manager/repositories/resource_slot/query.py @@ -183,3 +183,23 @@ def display_name(ascending: bool = True) -> QueryOrder: if ascending: return ResourceSlotTypeRow.display_name.asc() return ResourceSlotTypeRow.display_name.desc() + + +class CursorConditions: + @staticmethod + def by_cursor_forward(cursor_slot_name: str) -> QueryCondition: + """Cursor condition for forward pagination (after cursor). slot_name is the primary key.""" + + def inner() -> sa.sql.expression.ColumnElement[bool]: + return ResourceSlotTypeRow.slot_name > cursor_slot_name + + return inner + + @staticmethod + def by_cursor_backward(cursor_slot_name: str) -> QueryCondition: + """Cursor condition for backward pagination (before cursor). slot_name is the primary key.""" + + def inner() -> sa.sql.expression.ColumnElement[bool]: + return ResourceSlotTypeRow.slot_name < cursor_slot_name + + return inner diff --git a/src/ai/backend/manager/services/resource_slot/actions/__init__.py b/src/ai/backend/manager/services/resource_slot/actions/__init__.py index b4ea4cf0eb6..656154a698d 100644 --- a/src/ai/backend/manager/services/resource_slot/actions/__init__.py +++ b/src/ai/backend/manager/services/resource_slot/actions/__init__.py @@ -1,4 +1,3 @@ -from .all_slot_types import AllSlotTypesAction, AllSlotTypesResult from .get_agent_resources import GetAgentResourcesAction, GetAgentResourcesResult from .get_domain_resource_overview import ( GetDomainResourceOverviewAction, @@ -18,8 +17,6 @@ from .search_resource_slot_types import SearchResourceSlotTypesAction, SearchResourceSlotTypesResult __all__ = ( - "AllSlotTypesAction", - "AllSlotTypesResult", "GetAgentResourcesAction", "GetAgentResourcesResult", "GetDomainResourceOverviewAction", diff --git a/src/ai/backend/manager/services/resource_slot/processors.py b/src/ai/backend/manager/services/resource_slot/processors.py index d9334f0ba41..db836645204 100644 --- a/src/ai/backend/manager/services/resource_slot/processors.py +++ b/src/ai/backend/manager/services/resource_slot/processors.py @@ -7,8 +7,6 @@ from ai.backend.manager.actions.types import AbstractProcessorPackage, ActionSpec from .actions import ( - AllSlotTypesAction, - AllSlotTypesResult, GetAgentResourcesAction, GetAgentResourcesResult, GetDomainResourceOverviewAction, @@ -30,7 +28,6 @@ class ResourceSlotProcessors(AbstractProcessorPackage): - all_slot_types: ActionProcessor[AllSlotTypesAction, AllSlotTypesResult] get_agent_resources: ActionProcessor[GetAgentResourcesAction, GetAgentResourcesResult] search_agent_resources: ActionProcessor[SearchAgentResourcesAction, SearchAgentResourcesResult] get_kernel_allocations: ActionProcessor[GetKernelAllocationsAction, GetKernelAllocationsResult] @@ -49,7 +46,6 @@ class ResourceSlotProcessors(AbstractProcessorPackage): ] def __init__(self, service: ResourceSlotService, action_monitors: list[ActionMonitor]) -> None: - self.all_slot_types = ActionProcessor(service.all_slot_types, action_monitors) self.get_agent_resources = ActionProcessor(service.get_agent_resources, action_monitors) self.search_agent_resources = ActionProcessor( service.search_agent_resources, action_monitors @@ -76,7 +72,6 @@ def __init__(self, service: ResourceSlotService, action_monitors: list[ActionMon @override def supported_actions(self) -> list[ActionSpec]: return [ - AllSlotTypesAction.spec(), GetAgentResourcesAction.spec(), SearchAgentResourcesAction.spec(), GetKernelAllocationsAction.spec(), diff --git a/src/ai/backend/manager/services/resource_slot/service.py b/src/ai/backend/manager/services/resource_slot/service.py index f2fdac8eb7d..0b2691588d2 100644 --- a/src/ai/backend/manager/services/resource_slot/service.py +++ b/src/ai/backend/manager/services/resource_slot/service.py @@ -10,7 +10,6 @@ from ai.backend.manager.models.resource_slot import ResourceSlotTypeRow from ai.backend.manager.repositories.resource_slot.repository import ResourceSlotRepository -from .actions.all_slot_types import AllSlotTypesAction, AllSlotTypesResult from .actions.get_agent_resources import GetAgentResourcesAction, GetAgentResourcesResult from .actions.get_domain_resource_overview import ( GetDomainResourceOverviewAction, @@ -55,11 +54,6 @@ class ResourceSlotService: def __init__(self, repository: ResourceSlotRepository) -> None: self._repository = repository - async def all_slot_types(self, action: AllSlotTypesAction) -> AllSlotTypesResult: - rows = await self._repository.all_slot_types() - items = [_row_to_slot_type_data(row) for row in rows] - return AllSlotTypesResult(items=items) - async def get_agent_resources(self, action: GetAgentResourcesAction) -> GetAgentResourcesResult: rows = await self._repository.get_agent_resources(action.agent_id) items = [ From bef273cbe3e28b66a97ac8be556154d55b69b0a9 Mon Sep 17 00:00:00 2001 From: HyeockJinKim Date: Fri, 6 Mar 2026 02:00:09 +0000 Subject: [PATCH 10/17] chore: update api schema dump Co-authored-by: octodog --- .../graphql-reference/supergraph.graphql | 32 +++++++++++++++++-- .../graphql-reference/v2-schema.graphql | 26 +++++++++++++-- 2 files changed, 54 insertions(+), 4 deletions(-) diff --git a/docs/manager/graphql-reference/supergraph.graphql b/docs/manager/graphql-reference/supergraph.graphql index 308bd0208fc..595880e1f39 100644 --- a/docs/manager/graphql-reference/supergraph.graphql +++ b/docs/manager/graphql-reference/supergraph.graphql @@ -9433,8 +9433,10 @@ type Query """ resourceSlotType(slotName: String!): ResourceSlotType @join__field(graph: STRAWBERRY) - """Added in 26.3.0. Returns all registered resource slot types.""" - resourceSlotTypes: ResourceSlotTypeConnection! @join__field(graph: STRAWBERRY) + """ + Added in 26.3.0. Returns resource slot types with pagination and filtering. + """ + resourceSlotTypes(filter: ResourceSlotTypeFilter = null, orderBy: [ResourceSlotTypeOrderBy!] = null, before: String = null, after: String = null, first: Int = null, last: Int = null, limit: Int = null, offset: Int = null): ResourceSlotTypeConnection! @join__field(graph: STRAWBERRY) """ Added in 26.2.0. @@ -10371,6 +10373,32 @@ type ResourceSlotTypeEdge node: ResourceSlotType! } +"""Added in 26.3.0. Filter criteria for querying resource slot types.""" +input ResourceSlotTypeFilter + @join__type(graph: STRAWBERRY) +{ + slotName: StringFilter = null + slotType: StringFilter = null + displayName: StringFilter = null +} + +"""Added in 26.3.0. Ordering specification for resource slot types.""" +input ResourceSlotTypeOrderBy + @join__type(graph: STRAWBERRY) +{ + field: ResourceSlotTypeOrderField! + direction: OrderDirection! = ASC +} + +"""Added in 26.3.0. Fields available for ordering resource slot types.""" +enum ResourceSlotTypeOrderField + @join__type(graph: STRAWBERRY) +{ + SLOT_NAME @join__enumValue(graph: STRAWBERRY) + RANK @join__enumValue(graph: STRAWBERRY) + DISPLAY_NAME @join__enumValue(graph: STRAWBERRY) +} + """ Added in 26.2.0. Resource weight with default indicator. Shows whether this resource type's weight was explicitly set or uses default. """ diff --git a/docs/manager/graphql-reference/v2-schema.graphql b/docs/manager/graphql-reference/v2-schema.graphql index d4193227290..da41a44c607 100644 --- a/docs/manager/graphql-reference/v2-schema.graphql +++ b/docs/manager/graphql-reference/v2-schema.graphql @@ -5146,8 +5146,10 @@ type Query { """ resourceSlotType(slotName: String!): ResourceSlotType - """Added in 26.3.0. Returns all registered resource slot types.""" - resourceSlotTypes: ResourceSlotTypeConnection! + """ + Added in 26.3.0. Returns resource slot types with pagination and filtering. + """ + resourceSlotTypes(filter: ResourceSlotTypeFilter = null, orderBy: [ResourceSlotTypeOrderBy!] = null, before: String = null, after: String = null, first: Int = null, last: Int = null, limit: Int = null, offset: Int = null): ResourceSlotTypeConnection! """ Added in 26.2.0. @@ -5997,6 +5999,26 @@ type ResourceSlotTypeEdge { node: ResourceSlotType! } +"""Added in 26.3.0. Filter criteria for querying resource slot types.""" +input ResourceSlotTypeFilter { + slotName: StringFilter = null + slotType: StringFilter = null + displayName: StringFilter = null +} + +"""Added in 26.3.0. Ordering specification for resource slot types.""" +input ResourceSlotTypeOrderBy { + field: ResourceSlotTypeOrderField! + direction: OrderDirection! = ASC +} + +"""Added in 26.3.0. Fields available for ordering resource slot types.""" +enum ResourceSlotTypeOrderField { + SLOT_NAME + RANK + DISPLAY_NAME +} + """ Added in 26.2.0. Resource weight with default indicator. Shows whether this resource type's weight was explicitly set or uses default. """ From 24a5c7cfc41eddd92de9a4d44c675cc267f3a50e Mon Sep 17 00:00:00 2001 From: HyeockJinKim Date: Fri, 6 Mar 2026 11:02:13 +0900 Subject: [PATCH 11/17] feat(BA-4904): use search actions with pagination for agent/kernel resource connections - Add AgentResourceQueryConditions/Orders and ResourceAllocationQueryConditions/Orders to query.py for cursor-based pagination on slot_name - Update fetch_agent_resources and fetch_kernel_allocations to accept pagination args (first/after/last/before/limit/offset) and use SearchAgentResourcesAction / SearchResourceAllocationsAction via build_querier() with base_conditions scope filter - Compute has_next_page/has_previous_page from actual search results instead of hardcoded False - Cursor now encodes slot_name only (within fixed agent/kernel scope) Co-Authored-By: Claude Sonnet 4.6 --- .../manager/api/gql/resource_slot/fetcher.py | 129 ++++++++++++++---- .../repositories/resource_slot/query.py | 70 +++++++++- 2 files changed, 170 insertions(+), 29 deletions(-) diff --git a/src/ai/backend/manager/api/gql/resource_slot/fetcher.py b/src/ai/backend/manager/api/gql/resource_slot/fetcher.py index 08817741d8a..c8066b5237a 100644 --- a/src/ai/backend/manager/api/gql/resource_slot/fetcher.py +++ b/src/ai/backend/manager/api/gql/resource_slot/fetcher.py @@ -21,8 +21,19 @@ ResourceAllocationData, ResourceSlotTypeData, ) -from ai.backend.manager.models.resource_slot import ResourceSlotTypeRow -from ai.backend.manager.repositories.resource_slot.query import CursorConditions, QueryOrders +from ai.backend.manager.models.resource_slot import ( + AgentResourceRow, + ResourceAllocationRow, + ResourceSlotTypeRow, +) +from ai.backend.manager.repositories.resource_slot.query import ( + AgentResourceQueryConditions, + AgentResourceQueryOrders, + CursorConditions, + QueryOrders, + ResourceAllocationQueryConditions, + ResourceAllocationQueryOrders, +) from ai.backend.manager.services.resource_slot.actions.get_agent_resources import ( GetAgentResourcesAction, ) @@ -32,6 +43,12 @@ from ai.backend.manager.services.resource_slot.actions.get_resource_slot_type import ( GetResourceSlotTypeAction, ) +from ai.backend.manager.services.resource_slot.actions.search_agent_resources import ( + SearchAgentResourcesAction, +) +from ai.backend.manager.services.resource_slot.actions.search_resource_allocations import ( + SearchResourceAllocationsAction, +) from ai.backend.manager.services.resource_slot.actions.search_resource_slot_types import ( SearchResourceSlotTypesAction, ) @@ -62,6 +79,28 @@ def _get_slot_type_pagination_spec() -> PaginationSpec: ) +@lru_cache(maxsize=1) +def _get_agent_resource_pagination_spec() -> PaginationSpec: + return PaginationSpec( + forward_order=AgentResourceQueryOrders.slot_name(ascending=True), + backward_order=AgentResourceQueryOrders.slot_name(ascending=False), + forward_condition_factory=AgentResourceQueryConditions.by_cursor_forward, + backward_condition_factory=AgentResourceQueryConditions.by_cursor_backward, + tiebreaker_order=AgentResourceRow.slot_name.asc(), + ) + + +@lru_cache(maxsize=1) +def _get_resource_allocation_pagination_spec() -> PaginationSpec: + return PaginationSpec( + forward_order=ResourceAllocationQueryOrders.slot_name(ascending=True), + backward_order=ResourceAllocationQueryOrders.slot_name(ascending=False), + forward_condition_factory=ResourceAllocationQueryConditions.by_cursor_forward, + backward_condition_factory=ResourceAllocationQueryConditions.by_cursor_backward, + tiebreaker_order=ResourceAllocationRow.slot_name.asc(), + ) + + async def fetch_resource_slot_types( info: Info[StrawberryGQLContext], filter: ResourceSlotTypeFilterGQL | None = None, @@ -125,31 +164,48 @@ async def fetch_resource_slot_type( async def fetch_agent_resources( info: Info[StrawberryGQLContext], agent_id: str, + before: str | None = None, + after: str | None = None, + first: int | None = None, + last: int | None = None, + limit: int | None = None, + offset: int | None = None, ) -> AgentResourceConnectionGQL: - """Fetch all per-slot resource entries for a given agent (shared for AgentV2GQL connection).""" + """Fetch per-slot resource entries for a given agent with pagination.""" + querier = info.context.gql_adapter.build_querier( + PaginationOptions( + first=first, + after=after, + last=last, + before=before, + limit=limit, + offset=offset, + ), + pagination_spec=_get_agent_resource_pagination_spec(), + base_conditions=[AgentResourceQueryConditions.by_agent_id(agent_id)], + ) + action_result = ( - await info.context.processors.resource_slot.get_agent_resources.wait_for_complete( - GetAgentResourcesAction(agent_id=agent_id) + await info.context.processors.resource_slot.search_agent_resources.wait_for_complete( + SearchAgentResourcesAction(querier=querier) ) ) edges = [] for data in action_result.items: node = AgentResourceSlotGQL.from_data(data) - cursor = encode_cursor(f"{data.agent_id}:{data.slot_name}") + cursor = encode_cursor(data.slot_name) edges.append(AgentResourceSlotEdgeGQL(node=node, cursor=cursor)) - page_info = strawberry.relay.PageInfo( - has_next_page=False, - has_previous_page=False, - start_cursor=edges[0].cursor if edges else None, - end_cursor=edges[-1].cursor if edges else None, - ) - return AgentResourceConnectionGQL( - count=len(edges), + count=action_result.total_count, edges=edges, - page_info=page_info, + page_info=strawberry.relay.PageInfo( + has_next_page=action_result.has_next_page, + has_previous_page=action_result.has_previous_page, + start_cursor=edges[0].cursor if edges else None, + end_cursor=edges[-1].cursor if edges else None, + ), ) @@ -173,31 +229,48 @@ async def fetch_agent_resource_slot( async def fetch_kernel_allocations( info: Info[StrawberryGQLContext], kernel_id: str, + before: str | None = None, + after: str | None = None, + first: int | None = None, + last: int | None = None, + limit: int | None = None, + offset: int | None = None, ) -> ResourceAllocationConnectionGQL: - """Fetch all per-slot allocation entries for a kernel (shared for KernelV2GQL connection).""" + """Fetch per-slot allocation entries for a kernel with pagination.""" + querier = info.context.gql_adapter.build_querier( + PaginationOptions( + first=first, + after=after, + last=last, + before=before, + limit=limit, + offset=offset, + ), + pagination_spec=_get_resource_allocation_pagination_spec(), + base_conditions=[ResourceAllocationQueryConditions.by_kernel_id(_uuid.UUID(kernel_id))], + ) + action_result = ( - await info.context.processors.resource_slot.get_kernel_allocations.wait_for_complete( - GetKernelAllocationsAction(kernel_id=_uuid.UUID(kernel_id)) + await info.context.processors.resource_slot.search_resource_allocations.wait_for_complete( + SearchResourceAllocationsAction(querier=querier) ) ) edges = [] for data in action_result.items: node = KernelResourceAllocationGQL.from_data(data) - cursor = encode_cursor(f"{data.kernel_id}:{data.slot_name}") + cursor = encode_cursor(data.slot_name) edges.append(KernelResourceAllocationEdgeGQL(node=node, cursor=cursor)) - page_info = strawberry.relay.PageInfo( - has_next_page=False, - has_previous_page=False, - start_cursor=edges[0].cursor if edges else None, - end_cursor=edges[-1].cursor if edges else None, - ) - return ResourceAllocationConnectionGQL( - count=len(edges), + count=action_result.total_count, edges=edges, - page_info=page_info, + page_info=strawberry.relay.PageInfo( + has_next_page=action_result.has_next_page, + has_previous_page=action_result.has_previous_page, + start_cursor=edges[0].cursor if edges else None, + end_cursor=edges[-1].cursor if edges else None, + ), ) diff --git a/src/ai/backend/manager/repositories/resource_slot/query.py b/src/ai/backend/manager/repositories/resource_slot/query.py index d68cff1b5cf..6a88a41a38f 100644 --- a/src/ai/backend/manager/repositories/resource_slot/query.py +++ b/src/ai/backend/manager/repositories/resource_slot/query.py @@ -1,9 +1,15 @@ from __future__ import annotations +import uuid + import sqlalchemy as sa from ai.backend.common.data.filter_specs import StringMatchSpec -from ai.backend.manager.models.resource_slot import ResourceSlotTypeRow +from ai.backend.manager.models.resource_slot import ( + AgentResourceRow, + ResourceAllocationRow, + ResourceSlotTypeRow, +) from ai.backend.manager.repositories.base import QueryCondition, QueryOrder @@ -203,3 +209,65 @@ def inner() -> sa.sql.expression.ColumnElement[bool]: return ResourceSlotTypeRow.slot_name < cursor_slot_name return inner + + +class AgentResourceQueryConditions: + @staticmethod + def by_agent_id(agent_id: str) -> QueryCondition: + def inner() -> sa.sql.expression.ColumnElement[bool]: + return AgentResourceRow.agent_id == agent_id + + return inner + + @staticmethod + def by_cursor_forward(cursor_slot_name: str) -> QueryCondition: + def inner() -> sa.sql.expression.ColumnElement[bool]: + return AgentResourceRow.slot_name > cursor_slot_name + + return inner + + @staticmethod + def by_cursor_backward(cursor_slot_name: str) -> QueryCondition: + def inner() -> sa.sql.expression.ColumnElement[bool]: + return AgentResourceRow.slot_name < cursor_slot_name + + return inner + + +class AgentResourceQueryOrders: + @staticmethod + def slot_name(ascending: bool = True) -> QueryOrder: + if ascending: + return AgentResourceRow.slot_name.asc() + return AgentResourceRow.slot_name.desc() + + +class ResourceAllocationQueryConditions: + @staticmethod + def by_kernel_id(kernel_id: uuid.UUID) -> QueryCondition: + def inner() -> sa.sql.expression.ColumnElement[bool]: + return ResourceAllocationRow.kernel_id == kernel_id + + return inner + + @staticmethod + def by_cursor_forward(cursor_slot_name: str) -> QueryCondition: + def inner() -> sa.sql.expression.ColumnElement[bool]: + return ResourceAllocationRow.slot_name > cursor_slot_name + + return inner + + @staticmethod + def by_cursor_backward(cursor_slot_name: str) -> QueryCondition: + def inner() -> sa.sql.expression.ColumnElement[bool]: + return ResourceAllocationRow.slot_name < cursor_slot_name + + return inner + + +class ResourceAllocationQueryOrders: + @staticmethod + def slot_name(ascending: bool = True) -> QueryOrder: + if ascending: + return ResourceAllocationRow.slot_name.asc() + return ResourceAllocationRow.slot_name.desc() From 3666865a70dad008cd8d14e32c76f658edd00bd8 Mon Sep 17 00:00:00 2001 From: HyeockJinKim Date: Fri, 6 Mar 2026 11:08:58 +0900 Subject: [PATCH 12/17] feat(BA-4904): add slot-specific actions to avoid full-list fetch + linear scan - Add AgentResourceNotFound and ResourceAllocationNotFound error types - Add get_agent_resource_by_slot and get_kernel_allocation_by_slot DB source methods - Add corresponding repository delegation methods - Add GetAgentResourceBySlotAction and GetKernelAllocationBySlotAction - Register new processors in ResourceSlotProcessors - Update load_agent_resource_data, load_kernel_allocation_data, fetch_agent_resource_slot, fetch_kernel_resource_allocation to use slot-specific actions instead of full-list fetch + linear scan Co-Authored-By: Claude Sonnet 4.6 --- .../manager/api/gql/resource_slot/fetcher.py | 76 ++++++++++--------- .../backend/manager/errors/resource_slot.py | 28 +++++++ .../resource_slot/db_source/db_source.py | 42 ++++++++++ .../repositories/resource_slot/repository.py | 12 +++ .../resource_slot/actions/__init__.py | 12 +++ .../actions/get_agent_resource_by_slot.py | 40 ++++++++++ .../actions/get_kernel_allocation_by_slot.py | 41 ++++++++++ .../services/resource_slot/processors.py | 18 +++++ .../manager/services/resource_slot/service.py | 36 +++++++++ 9 files changed, 269 insertions(+), 36 deletions(-) create mode 100644 src/ai/backend/manager/services/resource_slot/actions/get_agent_resource_by_slot.py create mode 100644 src/ai/backend/manager/services/resource_slot/actions/get_kernel_allocation_by_slot.py diff --git a/src/ai/backend/manager/api/gql/resource_slot/fetcher.py b/src/ai/backend/manager/api/gql/resource_slot/fetcher.py index c8066b5237a..9301a45c052 100644 --- a/src/ai/backend/manager/api/gql/resource_slot/fetcher.py +++ b/src/ai/backend/manager/api/gql/resource_slot/fetcher.py @@ -34,11 +34,11 @@ ResourceAllocationQueryConditions, ResourceAllocationQueryOrders, ) -from ai.backend.manager.services.resource_slot.actions.get_agent_resources import ( - GetAgentResourcesAction, +from ai.backend.manager.services.resource_slot.actions.get_agent_resource_by_slot import ( + GetAgentResourceBySlotAction, ) -from ai.backend.manager.services.resource_slot.actions.get_kernel_allocations import ( - GetKernelAllocationsAction, +from ai.backend.manager.services.resource_slot.actions.get_kernel_allocation_by_slot import ( + GetKernelAllocationBySlotAction, ) from ai.backend.manager.services.resource_slot.actions.get_resource_slot_type import ( GetResourceSlotTypeAction, @@ -215,15 +215,15 @@ async def fetch_agent_resource_slot( slot_name: str, ) -> AgentResourceSlotGQL | None: """Fetch a single per-slot resource entry for an agent (used by Node resolution).""" - action_result = ( - await info.context.processors.resource_slot.get_agent_resources.wait_for_complete( - GetAgentResourcesAction(agent_id=agent_id) + from ai.backend.manager.errors.resource_slot import AgentResourceNotFound + + try: + action_result = await info.context.processors.resource_slot.get_agent_resource_by_slot.wait_for_complete( + GetAgentResourceBySlotAction(agent_id=agent_id, slot_name=slot_name) ) - ) - for data in action_result.items: - if data.slot_name == slot_name: - return AgentResourceSlotGQL.from_data(data) - return None + except AgentResourceNotFound: + return None + return AgentResourceSlotGQL.from_data(action_result.item) async def fetch_kernel_allocations( @@ -280,15 +280,17 @@ async def fetch_kernel_resource_allocation( slot_name: str, ) -> KernelResourceAllocationGQL | None: """Fetch a single per-slot allocation for a kernel (used by Node resolution).""" - action_result = ( - await info.context.processors.resource_slot.get_kernel_allocations.wait_for_complete( - GetKernelAllocationsAction(kernel_id=_uuid.UUID(kernel_id_str)) + from ai.backend.manager.errors.resource_slot import ResourceAllocationNotFound + + try: + action_result = await info.context.processors.resource_slot.get_kernel_allocation_by_slot.wait_for_complete( + GetKernelAllocationBySlotAction( + kernel_id=_uuid.UUID(kernel_id_str), slot_name=slot_name + ) ) - ) - for data in action_result.items: - if data.slot_name == slot_name: - return KernelResourceAllocationGQL.from_data(data) - return None + except ResourceAllocationNotFound: + return None + return KernelResourceAllocationGQL.from_data(action_result.item) # ========== Raw data helpers for Node.resolve_nodes ========== @@ -324,15 +326,15 @@ async def load_agent_resource_data( slot_name: str, ) -> AgentResourceData | None: """Load raw AgentResourceData for a single agent+slot (used by Node.resolve_nodes).""" - action_result = ( - await info.context.processors.resource_slot.get_agent_resources.wait_for_complete( - GetAgentResourcesAction(agent_id=agent_id) + from ai.backend.manager.errors.resource_slot import AgentResourceNotFound + + try: + action_result = await info.context.processors.resource_slot.get_agent_resource_by_slot.wait_for_complete( + GetAgentResourceBySlotAction(agent_id=agent_id, slot_name=slot_name) ) - ) - for data in action_result.items: - if data.slot_name == slot_name: - return data - return None + except AgentResourceNotFound: + return None + return action_result.item async def load_kernel_allocation_data( @@ -341,12 +343,14 @@ async def load_kernel_allocation_data( slot_name: str, ) -> ResourceAllocationData | None: """Load raw ResourceAllocationData for a single kernel+slot (used by Node.resolve_nodes).""" - action_result = ( - await info.context.processors.resource_slot.get_kernel_allocations.wait_for_complete( - GetKernelAllocationsAction(kernel_id=_uuid.UUID(kernel_id_str)) + from ai.backend.manager.errors.resource_slot import ResourceAllocationNotFound + + try: + action_result = await info.context.processors.resource_slot.get_kernel_allocation_by_slot.wait_for_complete( + GetKernelAllocationBySlotAction( + kernel_id=_uuid.UUID(kernel_id_str), slot_name=slot_name + ) ) - ) - for data in action_result.items: - if data.slot_name == slot_name: - return data - return None + except ResourceAllocationNotFound: + return None + return action_result.item diff --git a/src/ai/backend/manager/errors/resource_slot.py b/src/ai/backend/manager/errors/resource_slot.py index 36babe58b9d..3bd35f2ede1 100644 --- a/src/ai/backend/manager/errors/resource_slot.py +++ b/src/ai/backend/manager/errors/resource_slot.py @@ -27,6 +27,34 @@ def error_code(self) -> ErrorCode: ) +class AgentResourceNotFound(BackendAIError): + """Raised when an agent resource entry for a given agent+slot is not found.""" + + error_type = "https://api.backend.ai/probs/agent-resource-not-found" + error_title = "Agent resource not found." + + def error_code(self) -> ErrorCode: + return ErrorCode( + domain=ErrorDomain.AGENT, + operation=ErrorOperation.READ, + error_detail=ErrorDetail.NOT_FOUND, + ) + + +class ResourceAllocationNotFound(BackendAIError): + """Raised when a resource allocation entry for a given kernel+slot is not found.""" + + error_type = "https://api.backend.ai/probs/resource-allocation-not-found" + error_title = "Resource allocation not found." + + def error_code(self) -> ErrorCode: + return ErrorCode( + domain=ErrorDomain.KERNEL, + operation=ErrorOperation.READ, + error_detail=ErrorDetail.NOT_FOUND, + ) + + class AgentResourceCapacityExceeded(BackendAIError): """Raised when an agent resource update would exceed the slot capacity.""" diff --git a/src/ai/backend/manager/repositories/resource_slot/db_source/db_source.py b/src/ai/backend/manager/repositories/resource_slot/db_source/db_source.py index b8a38863be1..065b6569434 100644 --- a/src/ai/backend/manager/repositories/resource_slot/db_source/db_source.py +++ b/src/ai/backend/manager/repositories/resource_slot/db_source/db_source.py @@ -20,6 +20,8 @@ ResourceSlotTypeSearchResult, ) from ai.backend.manager.errors.resource_slot import ( + AgentResourceNotFound, + ResourceAllocationNotFound, ResourceSlotTypeNotFound, ) from ai.backend.manager.models.kernel import KernelRow @@ -135,6 +137,25 @@ async def get_agent_resources(self, agent_id: str) -> list[AgentResourceRow]: result = await db_sess.execute(stmt) return list(result.scalars().all()) + async def get_agent_resource_by_slot(self, agent_id: str, slot_name: str) -> AgentResourceRow: + """Get a single slot capacity/usage row for a specific agent+slot combination. + + Raises: + AgentResourceNotFound: If no entry exists for the given agent and slot. + """ + async with self._db.begin_readonly_session_read_committed() as db_sess: + stmt = sa.select(AgentResourceRow).where( + AgentResourceRow.agent_id == agent_id, + AgentResourceRow.slot_name == slot_name, + ) + result = await db_sess.execute(stmt) + row = result.scalar_one_or_none() + if row is None: + raise AgentResourceNotFound( + f"Agent resource not found for agent='{agent_id}', slot='{slot_name}'." + ) + return row + async def search_agent_resources(self, querier: BatchQuerier) -> AgentResourceSearchResult: # Paginated search across all agent_resources rows. # Caller injects conditions (e.g. by_slot_name, by_agent_id) via querier. @@ -170,6 +191,27 @@ async def get_kernel_allocations(self, kernel_id: uuid.UUID) -> list[ResourceAll result = await db_sess.execute(stmt) return list(result.scalars().all()) + async def get_kernel_allocation_by_slot( + self, kernel_id: uuid.UUID, slot_name: str + ) -> ResourceAllocationRow: + """Get a single allocation row for a specific kernel+slot combination. + + Raises: + ResourceAllocationNotFound: If no entry exists for the given kernel and slot. + """ + async with self._db.begin_readonly_session_read_committed() as db_sess: + stmt = sa.select(ResourceAllocationRow).where( + ResourceAllocationRow.kernel_id == kernel_id, + ResourceAllocationRow.slot_name == slot_name, + ) + result = await db_sess.execute(stmt) + row = result.scalar_one_or_none() + if row is None: + raise ResourceAllocationNotFound( + f"Resource allocation not found for kernel='{kernel_id}', slot='{slot_name}'." + ) + return row + async def search_resource_allocations( self, querier: BatchQuerier ) -> ResourceAllocationSearchResult: diff --git a/src/ai/backend/manager/repositories/resource_slot/repository.py b/src/ai/backend/manager/repositories/resource_slot/repository.py index 08499ae8420..c9b445e56a4 100644 --- a/src/ai/backend/manager/repositories/resource_slot/repository.py +++ b/src/ai/backend/manager/repositories/resource_slot/repository.py @@ -89,6 +89,11 @@ async def get_agent_resources(self, agent_id: str) -> list[AgentResourceRow]: """Get all slot capacity/usage rows for a given agent.""" return await self._db_source.get_agent_resources(agent_id) + @resource_slot_repository_resilience.apply() + async def get_agent_resource_by_slot(self, agent_id: str, slot_name: str) -> AgentResourceRow: + """Get a single slot row for one agent+slot combination.""" + return await self._db_source.get_agent_resource_by_slot(agent_id, slot_name) + @resource_slot_repository_resilience.apply() async def search_agent_resources(self, querier: BatchQuerier) -> AgentResourceSearchResult: return await self._db_source.search_agent_resources(querier) @@ -100,6 +105,13 @@ async def get_kernel_allocations(self, kernel_id: uuid.UUID) -> list[ResourceAll """Get all per-slot allocation rows for a given kernel.""" return await self._db_source.get_kernel_allocations(kernel_id) + @resource_slot_repository_resilience.apply() + async def get_kernel_allocation_by_slot( + self, kernel_id: uuid.UUID, slot_name: str + ) -> ResourceAllocationRow: + """Get a single allocation row for one kernel+slot combination.""" + return await self._db_source.get_kernel_allocation_by_slot(kernel_id, slot_name) + @resource_slot_repository_resilience.apply() async def search_resource_allocations( self, querier: BatchQuerier diff --git a/src/ai/backend/manager/services/resource_slot/actions/__init__.py b/src/ai/backend/manager/services/resource_slot/actions/__init__.py index 656154a698d..6124c62e96b 100644 --- a/src/ai/backend/manager/services/resource_slot/actions/__init__.py +++ b/src/ai/backend/manager/services/resource_slot/actions/__init__.py @@ -1,8 +1,16 @@ +from .get_agent_resource_by_slot import ( + GetAgentResourceBySlotAction, + GetAgentResourceBySlotResult, +) from .get_agent_resources import GetAgentResourcesAction, GetAgentResourcesResult from .get_domain_resource_overview import ( GetDomainResourceOverviewAction, GetDomainResourceOverviewResult, ) +from .get_kernel_allocation_by_slot import ( + GetKernelAllocationBySlotAction, + GetKernelAllocationBySlotResult, +) from .get_kernel_allocations import GetKernelAllocationsAction, GetKernelAllocationsResult from .get_project_resource_overview import ( GetProjectResourceOverviewAction, @@ -17,10 +25,14 @@ from .search_resource_slot_types import SearchResourceSlotTypesAction, SearchResourceSlotTypesResult __all__ = ( + "GetAgentResourceBySlotAction", + "GetAgentResourceBySlotResult", "GetAgentResourcesAction", "GetAgentResourcesResult", "GetDomainResourceOverviewAction", "GetDomainResourceOverviewResult", + "GetKernelAllocationBySlotAction", + "GetKernelAllocationBySlotResult", "GetKernelAllocationsAction", "GetKernelAllocationsResult", "GetProjectResourceOverviewAction", diff --git a/src/ai/backend/manager/services/resource_slot/actions/get_agent_resource_by_slot.py b/src/ai/backend/manager/services/resource_slot/actions/get_agent_resource_by_slot.py new file mode 100644 index 00000000000..b68626e5366 --- /dev/null +++ b/src/ai/backend/manager/services/resource_slot/actions/get_agent_resource_by_slot.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import override + +from ai.backend.common.data.permission.types import EntityType +from ai.backend.manager.actions.action import BaseActionResult +from ai.backend.manager.actions.types import ActionOperationType +from ai.backend.manager.data.resource_slot.types import AgentResourceData + +from .base import ResourceSlotAction + + +@dataclass +class GetAgentResourceBySlotAction(ResourceSlotAction): + agent_id: str + slot_name: str + + @override + @classmethod + def entity_type(cls) -> EntityType: + return EntityType.AGENT_RESOURCE + + @override + @classmethod + def operation_type(cls) -> ActionOperationType: + return ActionOperationType.GET + + @override + def entity_id(self) -> str | None: + return f"{self.agent_id}:{self.slot_name}" + + +@dataclass +class GetAgentResourceBySlotResult(BaseActionResult): + item: AgentResourceData + + @override + def entity_id(self) -> str | None: + return None diff --git a/src/ai/backend/manager/services/resource_slot/actions/get_kernel_allocation_by_slot.py b/src/ai/backend/manager/services/resource_slot/actions/get_kernel_allocation_by_slot.py new file mode 100644 index 00000000000..4c2bf4c5ad6 --- /dev/null +++ b/src/ai/backend/manager/services/resource_slot/actions/get_kernel_allocation_by_slot.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +import uuid +from dataclasses import dataclass +from typing import override + +from ai.backend.common.data.permission.types import EntityType +from ai.backend.manager.actions.action import BaseActionResult +from ai.backend.manager.actions.types import ActionOperationType +from ai.backend.manager.data.resource_slot.types import ResourceAllocationData + +from .base import ResourceSlotAction + + +@dataclass +class GetKernelAllocationBySlotAction(ResourceSlotAction): + kernel_id: uuid.UUID + slot_name: str + + @override + @classmethod + def entity_type(cls) -> EntityType: + return EntityType.RESOURCE_ALLOCATION + + @override + @classmethod + def operation_type(cls) -> ActionOperationType: + return ActionOperationType.GET + + @override + def entity_id(self) -> str | None: + return f"{self.kernel_id}:{self.slot_name}" + + +@dataclass +class GetKernelAllocationBySlotResult(BaseActionResult): + item: ResourceAllocationData + + @override + def entity_id(self) -> str | None: + return None diff --git a/src/ai/backend/manager/services/resource_slot/processors.py b/src/ai/backend/manager/services/resource_slot/processors.py index db836645204..970a4180e24 100644 --- a/src/ai/backend/manager/services/resource_slot/processors.py +++ b/src/ai/backend/manager/services/resource_slot/processors.py @@ -7,10 +7,14 @@ from ai.backend.manager.actions.types import AbstractProcessorPackage, ActionSpec from .actions import ( + GetAgentResourceBySlotAction, + GetAgentResourceBySlotResult, GetAgentResourcesAction, GetAgentResourcesResult, GetDomainResourceOverviewAction, GetDomainResourceOverviewResult, + GetKernelAllocationBySlotAction, + GetKernelAllocationBySlotResult, GetKernelAllocationsAction, GetKernelAllocationsResult, GetProjectResourceOverviewAction, @@ -28,7 +32,13 @@ class ResourceSlotProcessors(AbstractProcessorPackage): + get_agent_resource_by_slot: ActionProcessor[ + GetAgentResourceBySlotAction, GetAgentResourceBySlotResult + ] get_agent_resources: ActionProcessor[GetAgentResourcesAction, GetAgentResourcesResult] + get_kernel_allocation_by_slot: ActionProcessor[ + GetKernelAllocationBySlotAction, GetKernelAllocationBySlotResult + ] search_agent_resources: ActionProcessor[SearchAgentResourcesAction, SearchAgentResourcesResult] get_kernel_allocations: ActionProcessor[GetKernelAllocationsAction, GetKernelAllocationsResult] search_resource_allocations: ActionProcessor[ @@ -46,7 +56,13 @@ class ResourceSlotProcessors(AbstractProcessorPackage): ] def __init__(self, service: ResourceSlotService, action_monitors: list[ActionMonitor]) -> None: + self.get_agent_resource_by_slot = ActionProcessor( + service.get_agent_resource_by_slot, action_monitors + ) self.get_agent_resources = ActionProcessor(service.get_agent_resources, action_monitors) + self.get_kernel_allocation_by_slot = ActionProcessor( + service.get_kernel_allocation_by_slot, action_monitors + ) self.search_agent_resources = ActionProcessor( service.search_agent_resources, action_monitors ) @@ -72,7 +88,9 @@ def __init__(self, service: ResourceSlotService, action_monitors: list[ActionMon @override def supported_actions(self) -> list[ActionSpec]: return [ + GetAgentResourceBySlotAction.spec(), GetAgentResourcesAction.spec(), + GetKernelAllocationBySlotAction.spec(), SearchAgentResourcesAction.spec(), GetKernelAllocationsAction.spec(), SearchResourceAllocationsAction.spec(), diff --git a/src/ai/backend/manager/services/resource_slot/service.py b/src/ai/backend/manager/services/resource_slot/service.py index 0b2691588d2..bba72cd1b96 100644 --- a/src/ai/backend/manager/services/resource_slot/service.py +++ b/src/ai/backend/manager/services/resource_slot/service.py @@ -10,11 +10,19 @@ from ai.backend.manager.models.resource_slot import ResourceSlotTypeRow from ai.backend.manager.repositories.resource_slot.repository import ResourceSlotRepository +from .actions.get_agent_resource_by_slot import ( + GetAgentResourceBySlotAction, + GetAgentResourceBySlotResult, +) from .actions.get_agent_resources import GetAgentResourcesAction, GetAgentResourcesResult from .actions.get_domain_resource_overview import ( GetDomainResourceOverviewAction, GetDomainResourceOverviewResult, ) +from .actions.get_kernel_allocation_by_slot import ( + GetKernelAllocationBySlotAction, + GetKernelAllocationBySlotResult, +) from .actions.get_kernel_allocations import GetKernelAllocationsAction, GetKernelAllocationsResult from .actions.get_project_resource_overview import ( GetProjectResourceOverviewAction, @@ -54,6 +62,19 @@ class ResourceSlotService: def __init__(self, repository: ResourceSlotRepository) -> None: self._repository = repository + async def get_agent_resource_by_slot( + self, action: GetAgentResourceBySlotAction + ) -> GetAgentResourceBySlotResult: + row = await self._repository.get_agent_resource_by_slot(action.agent_id, action.slot_name) + return GetAgentResourceBySlotResult( + item=AgentResourceData( + agent_id=row.agent_id, + slot_name=row.slot_name, + capacity=row.capacity, + used=row.used, + ) + ) + async def get_agent_resources(self, action: GetAgentResourcesAction) -> GetAgentResourcesResult: rows = await self._repository.get_agent_resources(action.agent_id) items = [ @@ -78,6 +99,21 @@ async def search_agent_resources( has_previous_page=result.has_previous_page, ) + async def get_kernel_allocation_by_slot( + self, action: GetKernelAllocationBySlotAction + ) -> GetKernelAllocationBySlotResult: + row = await self._repository.get_kernel_allocation_by_slot( + action.kernel_id, action.slot_name + ) + return GetKernelAllocationBySlotResult( + item=ResourceAllocationData( + kernel_id=row.kernel_id, + slot_name=row.slot_name, + requested=row.requested, + used=row.used, + ) + ) + async def get_kernel_allocations( self, action: GetKernelAllocationsAction ) -> GetKernelAllocationsResult: From 60a2e6af74e5e98941a4cbadd79133ed2284de2c Mon Sep 17 00:00:00 2001 From: HyeockJinKim Date: Fri, 6 Mar 2026 11:12:09 +0900 Subject: [PATCH 13/17] feat(BA-4904): honor required flag in resolve_nodes and propagate exceptions from fetchers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - ResourceSlotTypeGQL.resolve_nodes: re-raise ResourceSlotTypeNotFound when required=True - AgentResourceSlotGQL.resolve_nodes: catch AgentResourceNotFound, raise if required=True - KernelResourceAllocationGQL.resolve_nodes: catch ResourceAllocationNotFound, raise if required=True - load_agent_resource_data / load_kernel_allocation_data: remove silent catch→None pattern, let domain exceptions propagate per CLAUDE.md error handling principle Co-Authored-By: Claude Sonnet 4.6 --- .../manager/api/gql/resource_slot/fetcher.py | 28 +++++++++---------- .../manager/api/gql/resource_slot/types.py | 24 +++++++++++++--- 2 files changed, 34 insertions(+), 18 deletions(-) diff --git a/src/ai/backend/manager/api/gql/resource_slot/fetcher.py b/src/ai/backend/manager/api/gql/resource_slot/fetcher.py index 9301a45c052..b98798c48c9 100644 --- a/src/ai/backend/manager/api/gql/resource_slot/fetcher.py +++ b/src/ai/backend/manager/api/gql/resource_slot/fetcher.py @@ -324,16 +324,16 @@ async def load_agent_resource_data( info: Info[StrawberryGQLContext], agent_id: str, slot_name: str, -) -> AgentResourceData | None: - """Load raw AgentResourceData for a single agent+slot (used by Node.resolve_nodes).""" - from ai.backend.manager.errors.resource_slot import AgentResourceNotFound +) -> AgentResourceData: + """Load raw AgentResourceData for a single agent+slot (used by Node.resolve_nodes). - try: - action_result = await info.context.processors.resource_slot.get_agent_resource_by_slot.wait_for_complete( + Raises AgentResourceNotFound if the entry does not exist. + """ + action_result = ( + await info.context.processors.resource_slot.get_agent_resource_by_slot.wait_for_complete( GetAgentResourceBySlotAction(agent_id=agent_id, slot_name=slot_name) ) - except AgentResourceNotFound: - return None + ) return action_result.item @@ -341,16 +341,16 @@ async def load_kernel_allocation_data( info: Info[StrawberryGQLContext], kernel_id_str: str, slot_name: str, -) -> ResourceAllocationData | None: - """Load raw ResourceAllocationData for a single kernel+slot (used by Node.resolve_nodes).""" - from ai.backend.manager.errors.resource_slot import ResourceAllocationNotFound +) -> ResourceAllocationData: + """Load raw ResourceAllocationData for a single kernel+slot (used by Node.resolve_nodes). - try: - action_result = await info.context.processors.resource_slot.get_kernel_allocation_by_slot.wait_for_complete( + Raises ResourceAllocationNotFound if the entry does not exist. + """ + action_result = ( + await info.context.processors.resource_slot.get_kernel_allocation_by_slot.wait_for_complete( GetKernelAllocationBySlotAction( kernel_id=_uuid.UUID(kernel_id_str), slot_name=slot_name ) ) - except ResourceAllocationNotFound: - return None + ) return action_result.item diff --git a/src/ai/backend/manager/api/gql/resource_slot/types.py b/src/ai/backend/manager/api/gql/resource_slot/types.py index 72a4f93d61b..4166e1ab02a 100644 --- a/src/ai/backend/manager/api/gql/resource_slot/types.py +++ b/src/ai/backend/manager/api/gql/resource_slot/types.py @@ -96,6 +96,8 @@ async def resolve_nodes( # type: ignore[override] try: data = await load_resource_slot_type_data(info, slot_name) except ResourceSlotTypeNotFound: + if required: + raise results.append(None) else: results.append(cls.from_data(data)) @@ -240,12 +242,19 @@ async def resolve_nodes( # type: ignore[override] ) -> Iterable[Self | None]: # Node ID format: "{agent_id}:{slot_name}" from ai.backend.manager.api.gql.resource_slot.fetcher import load_agent_resource_data + from ai.backend.manager.errors.resource_slot import AgentResourceNotFound results: list[Self | None] = [] for node_id in node_ids: agent_id, _, slot_name = node_id.partition(":") - data = await load_agent_resource_data(info, agent_id, slot_name) - results.append(cls.from_data(data) if data is not None else None) + try: + data = await load_agent_resource_data(info, agent_id, slot_name) + except AgentResourceNotFound: + if required: + raise + results.append(None) + else: + results.append(cls.from_data(data)) return results @classmethod @@ -308,12 +317,19 @@ async def resolve_nodes( # type: ignore[override] ) -> Iterable[Self | None]: # Node ID format: "{kernel_id}:{slot_name}" from ai.backend.manager.api.gql.resource_slot.fetcher import load_kernel_allocation_data + from ai.backend.manager.errors.resource_slot import ResourceAllocationNotFound results: list[Self | None] = [] for node_id in node_ids: kernel_id_str, _, slot_name = node_id.partition(":") - data = await load_kernel_allocation_data(info, kernel_id_str, slot_name) - results.append(cls.from_data(data) if data is not None else None) + try: + data = await load_kernel_allocation_data(info, kernel_id_str, slot_name) + except ResourceAllocationNotFound: + if required: + raise + results.append(None) + else: + results.append(cls.from_data(data)) return results @classmethod From 14ad692123a3e7d1754247f697c5185d75e1b6bd Mon Sep 17 00:00:00 2001 From: HyeockJinKim Date: Fri, 6 Mar 2026 11:58:55 +0900 Subject: [PATCH 14/17] fix(BA-4904): address remaining PR review feedback - Add pagination args (first/after/last/before/limit/offset) to AgentV2GQL.resource_slots and KernelV2GQL.resource_allocations field resolvers so clients can paginate these connections - Delete unused AllSlotTypesAction service action file - Remove dead fetch_agent_resource_slot and fetch_kernel_resource_allocation functions from fetcher.py - Simplify load_resource_slot_type_data: return action_result.item directly instead of redundantly reconstructing ResourceSlotTypeData Co-Authored-By: Claude Sonnet 4.6 --- src/ai/backend/manager/api/gql/agent/types.py | 17 ++++++- .../backend/manager/api/gql/kernel/types.py | 17 ++++++- .../manager/api/gql/resource_slot/fetcher.py | 47 +------------------ .../resource_slot/actions/all_slot_types.py | 37 --------------- 4 files changed, 33 insertions(+), 85 deletions(-) delete mode 100644 src/ai/backend/manager/services/resource_slot/actions/all_slot_types.py diff --git a/src/ai/backend/manager/api/gql/agent/types.py b/src/ai/backend/manager/api/gql/agent/types.py index cad67fbbc0e..da63a692008 100644 --- a/src/ai/backend/manager/api/gql/agent/types.py +++ b/src/ai/backend/manager/api/gql/agent/types.py @@ -496,6 +496,12 @@ async def sessions( async def resource_slots( self, info: Info[StrawberryGQLContext], + first: int | None = None, + after: str | None = None, + last: int | None = None, + before: str | None = None, + limit: int | None = None, + offset: int | None = None, ) -> Annotated[ AgentResourceConnectionGQL, strawberry.lazy("ai.backend.manager.api.gql.resource_slot.types"), @@ -503,7 +509,16 @@ async def resource_slots( """Fetch per-slot resource capacity and usage for this agent.""" from ai.backend.manager.api.gql.resource_slot.fetcher import fetch_agent_resources - return await fetch_agent_resources(info=info, agent_id=str(self._agent_id)) + return await fetch_agent_resources( + info=info, + agent_id=str(self._agent_id), + first=first, + after=after, + last=last, + before=before, + limit=limit, + offset=offset, + ) @classmethod async def resolve_nodes( # type: ignore[override] # Strawberry Node uses AwaitableOrValue overloads incompatible with async def diff --git a/src/ai/backend/manager/api/gql/kernel/types.py b/src/ai/backend/manager/api/gql/kernel/types.py index 52c1eb31ee1..82ee0684d5c 100644 --- a/src/ai/backend/manager/api/gql/kernel/types.py +++ b/src/ai/backend/manager/api/gql/kernel/types.py @@ -471,6 +471,12 @@ async def session( async def resource_allocations( self, info: Info[StrawberryGQLContext], + first: int | None = None, + after: str | None = None, + last: int | None = None, + before: str | None = None, + limit: int | None = None, + offset: int | None = None, ) -> Annotated[ ResourceAllocationConnectionGQL, strawberry.lazy("ai.backend.manager.api.gql.resource_slot.types"), @@ -478,7 +484,16 @@ async def resource_allocations( """Fetch per-slot resource allocation for this kernel.""" from ai.backend.manager.api.gql.resource_slot.fetcher import fetch_kernel_allocations - return await fetch_kernel_allocations(info=info, kernel_id=str(self.id)) + return await fetch_kernel_allocations( + info=info, + kernel_id=str(self.id), + first=first, + after=after, + last=last, + before=before, + limit=limit, + offset=offset, + ) @classmethod async def resolve_nodes( # type: ignore[override] # Strawberry Node uses AwaitableOrValue overloads incompatible with async def diff --git a/src/ai/backend/manager/api/gql/resource_slot/fetcher.py b/src/ai/backend/manager/api/gql/resource_slot/fetcher.py index b98798c48c9..2dae12f88f3 100644 --- a/src/ai/backend/manager/api/gql/resource_slot/fetcher.py +++ b/src/ai/backend/manager/api/gql/resource_slot/fetcher.py @@ -209,23 +209,6 @@ async def fetch_agent_resources( ) -async def fetch_agent_resource_slot( - info: Info[StrawberryGQLContext], - agent_id: str, - slot_name: str, -) -> AgentResourceSlotGQL | None: - """Fetch a single per-slot resource entry for an agent (used by Node resolution).""" - from ai.backend.manager.errors.resource_slot import AgentResourceNotFound - - try: - action_result = await info.context.processors.resource_slot.get_agent_resource_by_slot.wait_for_complete( - GetAgentResourceBySlotAction(agent_id=agent_id, slot_name=slot_name) - ) - except AgentResourceNotFound: - return None - return AgentResourceSlotGQL.from_data(action_result.item) - - async def fetch_kernel_allocations( info: Info[StrawberryGQLContext], kernel_id: str, @@ -274,25 +257,6 @@ async def fetch_kernel_allocations( ) -async def fetch_kernel_resource_allocation( - info: Info[StrawberryGQLContext], - kernel_id_str: str, - slot_name: str, -) -> KernelResourceAllocationGQL | None: - """Fetch a single per-slot allocation for a kernel (used by Node resolution).""" - from ai.backend.manager.errors.resource_slot import ResourceAllocationNotFound - - try: - action_result = await info.context.processors.resource_slot.get_kernel_allocation_by_slot.wait_for_complete( - GetKernelAllocationBySlotAction( - kernel_id=_uuid.UUID(kernel_id_str), slot_name=slot_name - ) - ) - except ResourceAllocationNotFound: - return None - return KernelResourceAllocationGQL.from_data(action_result.item) - - # ========== Raw data helpers for Node.resolve_nodes ========== # These return raw data types so that resolve_nodes can call cls.from_data(), # which enables mypy to correctly infer the return type as Iterable[Self | None]. @@ -308,16 +272,7 @@ async def load_resource_slot_type_data( GetResourceSlotTypeAction(slot_name=slot_name) ) ) - return ResourceSlotTypeData( - slot_name=action_result.item.slot_name, - slot_type=action_result.item.slot_type, - display_name=action_result.item.display_name, - description=action_result.item.description, - display_unit=action_result.item.display_unit, - display_icon=action_result.item.display_icon, - number_format=action_result.item.number_format, - rank=action_result.item.rank, - ) + return action_result.item async def load_agent_resource_data( diff --git a/src/ai/backend/manager/services/resource_slot/actions/all_slot_types.py b/src/ai/backend/manager/services/resource_slot/actions/all_slot_types.py deleted file mode 100644 index 581d61d847c..00000000000 --- a/src/ai/backend/manager/services/resource_slot/actions/all_slot_types.py +++ /dev/null @@ -1,37 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass -from typing import override - -from ai.backend.common.data.permission.types import EntityType -from ai.backend.manager.actions.action import BaseActionResult -from ai.backend.manager.actions.types import ActionOperationType -from ai.backend.manager.data.resource_slot.types import ResourceSlotTypeData - -from .base import ResourceSlotAction - - -@dataclass -class AllSlotTypesAction(ResourceSlotAction): - @override - @classmethod - def entity_type(cls) -> EntityType: - return EntityType.RESOURCE_SLOT_TYPE - - @override - @classmethod - def operation_type(cls) -> ActionOperationType: - return ActionOperationType.SEARCH - - @override - def entity_id(self) -> str | None: - return None - - -@dataclass -class AllSlotTypesResult(BaseActionResult): - items: list[ResourceSlotTypeData] - - @override - def entity_id(self) -> str | None: - return None From 2100a2af6ee967c50d83bd1749ae4c149abe8b00 Mon Sep 17 00:00:00 2001 From: HyeockJinKim Date: Fri, 6 Mar 2026 03:04:18 +0000 Subject: [PATCH 15/17] chore: update api schema dump Co-authored-by: octodog --- docs/manager/graphql-reference/supergraph.graphql | 4 ++-- docs/manager/graphql-reference/v2-schema.graphql | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/manager/graphql-reference/supergraph.graphql b/docs/manager/graphql-reference/supergraph.graphql index 595880e1f39..6b67e9b8cac 100644 --- a/docs/manager/graphql-reference/supergraph.graphql +++ b/docs/manager/graphql-reference/supergraph.graphql @@ -573,7 +573,7 @@ type AgentV2 implements Node sessions(filter: SessionV2Filter = null, orderBy: [SessionV2OrderBy!] = null, before: String = null, after: String = null, first: Int = null, last: Int = null, limit: Int = null, offset: Int = null): SessionV2Connection! """Added in 26.4.0. Per-slot resource capacity and usage for this agent.""" - resourceSlots: AgentResourceConnection! + resourceSlots(first: Int = null, after: String = null, last: Int = null, before: String = null, limit: Int = null, offset: Int = null): AgentResourceConnection! } """ @@ -5574,7 +5574,7 @@ type KernelV2 implements Node session: SessionV2 """Added in 26.3.0. Per-slot resource allocation for this kernel.""" - resourceAllocations: ResourceAllocationConnection! + resourceAllocations(first: Int = null, after: String = null, last: Int = null, before: String = null, limit: Int = null, offset: Int = null): ResourceAllocationConnection! } """ diff --git a/docs/manager/graphql-reference/v2-schema.graphql b/docs/manager/graphql-reference/v2-schema.graphql index da41a44c607..540b32906c7 100644 --- a/docs/manager/graphql-reference/v2-schema.graphql +++ b/docs/manager/graphql-reference/v2-schema.graphql @@ -362,7 +362,7 @@ type AgentV2 implements Node { sessions(filter: SessionV2Filter = null, orderBy: [SessionV2OrderBy!] = null, before: String = null, after: String = null, first: Int = null, last: Int = null, limit: Int = null, offset: Int = null): SessionV2Connection! """Added in 26.4.0. Per-slot resource capacity and usage for this agent.""" - resourceSlots: AgentResourceConnection! + resourceSlots(first: Int = null, after: String = null, last: Int = null, before: String = null, limit: Int = null, offset: Int = null): AgentResourceConnection! } """ @@ -3132,7 +3132,7 @@ type KernelV2 implements Node { session: SessionV2 """Added in 26.3.0. Per-slot resource allocation for this kernel.""" - resourceAllocations: ResourceAllocationConnection! + resourceAllocations(first: Int = null, after: String = null, last: Int = null, before: String = null, limit: Int = null, offset: Int = null): ResourceAllocationConnection! } """ From 610321ddd03f5101d224531060978e8f877bc428 Mon Sep 17 00:00:00 2001 From: HyeockJinKim Date: Fri, 6 Mar 2026 13:53:19 +0900 Subject: [PATCH 16/17] Add filter/order support for AgentResourceSlot and KernelResourceAllocation Conform nested connections to the existing scope/filter/order pattern: - AgentResourceSlotFilterGQL: filter by slot_name - AgentResourceSlotOrderByGQL: order by slot_name, capacity, used - KernelResourceAllocationFilterGQL: filter by slot_name - KernelResourceAllocationOrderByGQL: order by slot_name, requested, used Update fetcher functions and nested connection resolvers in AgentV2GQL and KernelV2GQL to accept and pass filter/order_by parameters. Co-Authored-By: Claude Opus 4.6 --- src/ai/backend/manager/api/gql/agent/types.py | 20 ++- .../backend/manager/api/gql/kernel/types.py | 20 ++- .../manager/api/gql/resource_slot/fetcher.py | 16 ++- .../manager/api/gql/resource_slot/types.py | 121 ++++++++++++++++- .../repositories/resource_slot/query.py | 128 ++++++++++++++++++ 5 files changed, 300 insertions(+), 5 deletions(-) diff --git a/src/ai/backend/manager/api/gql/agent/types.py b/src/ai/backend/manager/api/gql/agent/types.py index da63a692008..3fb44d5662e 100644 --- a/src/ai/backend/manager/api/gql/agent/types.py +++ b/src/ai/backend/manager/api/gql/agent/types.py @@ -20,7 +20,11 @@ KernelV2FilterGQL, KernelV2OrderByGQL, ) - from ai.backend.manager.api.gql.resource_slot.types import AgentResourceConnectionGQL + from ai.backend.manager.api.gql.resource_slot.types import ( + AgentResourceConnectionGQL, + AgentResourceSlotFilterGQL, + AgentResourceSlotOrderByGQL, + ) from ai.backend.manager.api.gql.session.types import ( SessionV2ConnectionGQL, SessionV2FilterGQL, @@ -496,6 +500,18 @@ async def sessions( async def resource_slots( self, info: Info[StrawberryGQLContext], + filter: Annotated[ + AgentResourceSlotFilterGQL, + strawberry.lazy("ai.backend.manager.api.gql.resource_slot.types"), + ] + | None = None, + order_by: list[ + Annotated[ + AgentResourceSlotOrderByGQL, + strawberry.lazy("ai.backend.manager.api.gql.resource_slot.types"), + ] + ] + | None = None, first: int | None = None, after: str | None = None, last: int | None = None, @@ -512,6 +528,8 @@ async def resource_slots( return await fetch_agent_resources( info=info, agent_id=str(self._agent_id), + filter=filter, + order_by=order_by, first=first, after=after, last=last, diff --git a/src/ai/backend/manager/api/gql/kernel/types.py b/src/ai/backend/manager/api/gql/kernel/types.py index 82ee0684d5c..d88e86271a7 100644 --- a/src/ai/backend/manager/api/gql/kernel/types.py +++ b/src/ai/backend/manager/api/gql/kernel/types.py @@ -17,7 +17,11 @@ from ai.backend.manager.api.gql.base import OrderDirection, UUIDFilter if TYPE_CHECKING: - from ai.backend.manager.api.gql.resource_slot.types import ResourceAllocationConnectionGQL + from ai.backend.manager.api.gql.resource_slot.types import ( + KernelResourceAllocationFilterGQL, + KernelResourceAllocationOrderByGQL, + ResourceAllocationConnectionGQL, + ) from ai.backend.manager.api.gql.session.types import SessionV2GQL from ai.backend.manager.repositories.base import QueryCondition @@ -471,6 +475,18 @@ async def session( async def resource_allocations( self, info: Info[StrawberryGQLContext], + filter: Annotated[ + KernelResourceAllocationFilterGQL, + strawberry.lazy("ai.backend.manager.api.gql.resource_slot.types"), + ] + | None = None, + order_by: list[ + Annotated[ + KernelResourceAllocationOrderByGQL, + strawberry.lazy("ai.backend.manager.api.gql.resource_slot.types"), + ] + ] + | None = None, first: int | None = None, after: str | None = None, last: int | None = None, @@ -487,6 +503,8 @@ async def resource_allocations( return await fetch_kernel_allocations( info=info, kernel_id=str(self.id), + filter=filter, + order_by=order_by, first=first, after=after, last=last, diff --git a/src/ai/backend/manager/api/gql/resource_slot/fetcher.py b/src/ai/backend/manager/api/gql/resource_slot/fetcher.py index 2dae12f88f3..ea93c112382 100644 --- a/src/ai/backend/manager/api/gql/resource_slot/fetcher.py +++ b/src/ai/backend/manager/api/gql/resource_slot/fetcher.py @@ -56,9 +56,13 @@ from .types import ( AgentResourceConnectionGQL, AgentResourceSlotEdgeGQL, + AgentResourceSlotFilterGQL, AgentResourceSlotGQL, + AgentResourceSlotOrderByGQL, KernelResourceAllocationEdgeGQL, + KernelResourceAllocationFilterGQL, KernelResourceAllocationGQL, + KernelResourceAllocationOrderByGQL, ResourceAllocationConnectionGQL, ResourceSlotTypeConnectionGQL, ResourceSlotTypeEdgeGQL, @@ -164,6 +168,8 @@ async def fetch_resource_slot_type( async def fetch_agent_resources( info: Info[StrawberryGQLContext], agent_id: str, + filter: AgentResourceSlotFilterGQL | None = None, + order_by: list[AgentResourceSlotOrderByGQL] | None = None, before: str | None = None, after: str | None = None, first: int | None = None, @@ -171,7 +177,7 @@ async def fetch_agent_resources( limit: int | None = None, offset: int | None = None, ) -> AgentResourceConnectionGQL: - """Fetch per-slot resource entries for a given agent with pagination.""" + """Fetch per-slot resource entries for a given agent with pagination and filtering.""" querier = info.context.gql_adapter.build_querier( PaginationOptions( first=first, @@ -182,6 +188,8 @@ async def fetch_agent_resources( offset=offset, ), pagination_spec=_get_agent_resource_pagination_spec(), + filter=filter, + order_by=order_by, base_conditions=[AgentResourceQueryConditions.by_agent_id(agent_id)], ) @@ -212,6 +220,8 @@ async def fetch_agent_resources( async def fetch_kernel_allocations( info: Info[StrawberryGQLContext], kernel_id: str, + filter: KernelResourceAllocationFilterGQL | None = None, + order_by: list[KernelResourceAllocationOrderByGQL] | None = None, before: str | None = None, after: str | None = None, first: int | None = None, @@ -219,7 +229,7 @@ async def fetch_kernel_allocations( limit: int | None = None, offset: int | None = None, ) -> ResourceAllocationConnectionGQL: - """Fetch per-slot allocation entries for a kernel with pagination.""" + """Fetch per-slot allocation entries for a kernel with pagination and filtering.""" querier = info.context.gql_adapter.build_querier( PaginationOptions( first=first, @@ -230,6 +240,8 @@ async def fetch_kernel_allocations( offset=offset, ), pagination_spec=_get_resource_allocation_pagination_spec(), + filter=filter, + order_by=order_by, base_conditions=[ResourceAllocationQueryConditions.by_kernel_id(_uuid.UUID(kernel_id))], ) diff --git a/src/ai/backend/manager/api/gql/resource_slot/types.py b/src/ai/backend/manager/api/gql/resource_slot/types.py index 4166e1ab02a..52387317162 100644 --- a/src/ai/backend/manager/api/gql/resource_slot/types.py +++ b/src/ai/backend/manager/api/gql/resource_slot/types.py @@ -27,7 +27,14 @@ ResourceSlotTypeData, ) from ai.backend.manager.repositories.base import QueryCondition, QueryOrder -from ai.backend.manager.repositories.resource_slot.query import QueryConditions, QueryOrders +from ai.backend.manager.repositories.resource_slot.query import ( + AgentResourceQueryConditions, + AgentResourceQueryOrders, + QueryConditions, + QueryOrders, + ResourceAllocationQueryConditions, + ResourceAllocationQueryOrders, +) # ========== NumberFormat ========== @@ -283,6 +290,61 @@ def __init__(self, *args: Any, count: int, **kwargs: Any) -> None: self.count = count +# ========== AgentResourceSlot Filter/OrderBy ========== + + +@strawberry.enum( + name="AgentResourceSlotOrderField", + description="Added in 26.3.0. Fields available for ordering agent resource slots.", +) +class AgentResourceSlotOrderFieldGQL(StrEnum): + SLOT_NAME = "slot_name" + CAPACITY = "capacity" + USED = "used" + + +@strawberry.input( + name="AgentResourceSlotFilter", + description="Added in 26.3.0. Filter criteria for querying agent resource slots.", +) +class AgentResourceSlotFilterGQL(GQLFilter): + slot_name: StringFilter | None = None + + def build_conditions(self) -> list[QueryCondition]: + conditions: list[QueryCondition] = [] + if self.slot_name: + condition = self.slot_name.build_query_condition( + contains_factory=AgentResourceQueryConditions.by_slot_name_contains, + equals_factory=AgentResourceQueryConditions.by_slot_name_equals, + starts_with_factory=AgentResourceQueryConditions.by_slot_name_starts_with, + ends_with_factory=AgentResourceQueryConditions.by_slot_name_ends_with, + ) + if condition: + conditions.append(condition) + return conditions + + +@strawberry.input( + name="AgentResourceSlotOrderBy", + description="Added in 26.3.0. Ordering specification for agent resource slots.", +) +class AgentResourceSlotOrderByGQL(GQLOrderBy): + field: AgentResourceSlotOrderFieldGQL + direction: OrderDirection = OrderDirection.ASC + + def to_query_order(self) -> QueryOrder: + ascending = self.direction == OrderDirection.ASC + match self.field: + case AgentResourceSlotOrderFieldGQL.SLOT_NAME: + return AgentResourceQueryOrders.slot_name(ascending) + case AgentResourceSlotOrderFieldGQL.CAPACITY: + return AgentResourceQueryOrders.capacity(ascending) + case AgentResourceSlotOrderFieldGQL.USED: + return AgentResourceQueryOrders.used(ascending) + case _: + raise ValueError(f"Unhandled AgentResourceSlotOrderFieldGQL value: {self.field!r}") + + # ========== KernelResourceAllocationGQL (Node) ========== @@ -343,6 +405,63 @@ def from_data(cls, data: ResourceAllocationData) -> Self: ) +# ========== KernelResourceAllocation Filter/OrderBy ========== + + +@strawberry.enum( + name="KernelResourceAllocationOrderField", + description="Added in 26.3.0. Fields available for ordering kernel resource allocations.", +) +class KernelResourceAllocationOrderFieldGQL(StrEnum): + SLOT_NAME = "slot_name" + REQUESTED = "requested" + USED = "used" + + +@strawberry.input( + name="KernelResourceAllocationFilter", + description="Added in 26.3.0. Filter criteria for querying kernel resource allocations.", +) +class KernelResourceAllocationFilterGQL(GQLFilter): + slot_name: StringFilter | None = None + + def build_conditions(self) -> list[QueryCondition]: + conditions: list[QueryCondition] = [] + if self.slot_name: + condition = self.slot_name.build_query_condition( + contains_factory=ResourceAllocationQueryConditions.by_slot_name_contains, + equals_factory=ResourceAllocationQueryConditions.by_slot_name_equals, + starts_with_factory=ResourceAllocationQueryConditions.by_slot_name_starts_with, + ends_with_factory=ResourceAllocationQueryConditions.by_slot_name_ends_with, + ) + if condition: + conditions.append(condition) + return conditions + + +@strawberry.input( + name="KernelResourceAllocationOrderBy", + description="Added in 26.3.0. Ordering specification for kernel resource allocations.", +) +class KernelResourceAllocationOrderByGQL(GQLOrderBy): + field: KernelResourceAllocationOrderFieldGQL + direction: OrderDirection = OrderDirection.ASC + + def to_query_order(self) -> QueryOrder: + ascending = self.direction == OrderDirection.ASC + match self.field: + case KernelResourceAllocationOrderFieldGQL.SLOT_NAME: + return ResourceAllocationQueryOrders.slot_name(ascending) + case KernelResourceAllocationOrderFieldGQL.REQUESTED: + return ResourceAllocationQueryOrders.requested(ascending) + case KernelResourceAllocationOrderFieldGQL.USED: + return ResourceAllocationQueryOrders.used(ascending) + case _: + raise ValueError( + f"Unhandled KernelResourceAllocationOrderFieldGQL value: {self.field!r}" + ) + + KernelResourceAllocationEdgeGQL = Edge[KernelResourceAllocationGQL] diff --git a/src/ai/backend/manager/repositories/resource_slot/query.py b/src/ai/backend/manager/repositories/resource_slot/query.py index 6a88a41a38f..6a6a9771e4e 100644 --- a/src/ai/backend/manager/repositories/resource_slot/query.py +++ b/src/ai/backend/manager/repositories/resource_slot/query.py @@ -219,6 +219,58 @@ def inner() -> sa.sql.expression.ColumnElement[bool]: return inner + @staticmethod + def by_slot_name_contains(spec: StringMatchSpec) -> QueryCondition: + def inner() -> sa.sql.expression.ColumnElement[bool]: + if spec.case_insensitive: + condition = AgentResourceRow.slot_name.ilike(f"%{spec.value}%") + else: + condition = AgentResourceRow.slot_name.like(f"%{spec.value}%") + if spec.negated: + condition = sa.not_(condition) + return condition + + return inner + + @staticmethod + def by_slot_name_equals(spec: StringMatchSpec) -> QueryCondition: + def inner() -> sa.sql.expression.ColumnElement[bool]: + if spec.case_insensitive: + condition = sa.func.lower(AgentResourceRow.slot_name) == spec.value.lower() + else: + condition = AgentResourceRow.slot_name == spec.value + if spec.negated: + condition = sa.not_(condition) + return condition + + return inner + + @staticmethod + def by_slot_name_starts_with(spec: StringMatchSpec) -> QueryCondition: + def inner() -> sa.sql.expression.ColumnElement[bool]: + if spec.case_insensitive: + condition = AgentResourceRow.slot_name.ilike(f"{spec.value}%") + else: + condition = AgentResourceRow.slot_name.like(f"{spec.value}%") + if spec.negated: + condition = sa.not_(condition) + return condition + + return inner + + @staticmethod + def by_slot_name_ends_with(spec: StringMatchSpec) -> QueryCondition: + def inner() -> sa.sql.expression.ColumnElement[bool]: + if spec.case_insensitive: + condition = AgentResourceRow.slot_name.ilike(f"%{spec.value}") + else: + condition = AgentResourceRow.slot_name.like(f"%{spec.value}") + if spec.negated: + condition = sa.not_(condition) + return condition + + return inner + @staticmethod def by_cursor_forward(cursor_slot_name: str) -> QueryCondition: def inner() -> sa.sql.expression.ColumnElement[bool]: @@ -241,6 +293,18 @@ def slot_name(ascending: bool = True) -> QueryOrder: return AgentResourceRow.slot_name.asc() return AgentResourceRow.slot_name.desc() + @staticmethod + def capacity(ascending: bool = True) -> QueryOrder: + if ascending: + return AgentResourceRow.capacity.asc() + return AgentResourceRow.capacity.desc() + + @staticmethod + def used(ascending: bool = True) -> QueryOrder: + if ascending: + return AgentResourceRow.used.asc() + return AgentResourceRow.used.desc() + class ResourceAllocationQueryConditions: @staticmethod @@ -250,6 +314,58 @@ def inner() -> sa.sql.expression.ColumnElement[bool]: return inner + @staticmethod + def by_slot_name_contains(spec: StringMatchSpec) -> QueryCondition: + def inner() -> sa.sql.expression.ColumnElement[bool]: + if spec.case_insensitive: + condition = ResourceAllocationRow.slot_name.ilike(f"%{spec.value}%") + else: + condition = ResourceAllocationRow.slot_name.like(f"%{spec.value}%") + if spec.negated: + condition = sa.not_(condition) + return condition + + return inner + + @staticmethod + def by_slot_name_equals(spec: StringMatchSpec) -> QueryCondition: + def inner() -> sa.sql.expression.ColumnElement[bool]: + if spec.case_insensitive: + condition = sa.func.lower(ResourceAllocationRow.slot_name) == spec.value.lower() + else: + condition = ResourceAllocationRow.slot_name == spec.value + if spec.negated: + condition = sa.not_(condition) + return condition + + return inner + + @staticmethod + def by_slot_name_starts_with(spec: StringMatchSpec) -> QueryCondition: + def inner() -> sa.sql.expression.ColumnElement[bool]: + if spec.case_insensitive: + condition = ResourceAllocationRow.slot_name.ilike(f"{spec.value}%") + else: + condition = ResourceAllocationRow.slot_name.like(f"{spec.value}%") + if spec.negated: + condition = sa.not_(condition) + return condition + + return inner + + @staticmethod + def by_slot_name_ends_with(spec: StringMatchSpec) -> QueryCondition: + def inner() -> sa.sql.expression.ColumnElement[bool]: + if spec.case_insensitive: + condition = ResourceAllocationRow.slot_name.ilike(f"%{spec.value}") + else: + condition = ResourceAllocationRow.slot_name.like(f"%{spec.value}") + if spec.negated: + condition = sa.not_(condition) + return condition + + return inner + @staticmethod def by_cursor_forward(cursor_slot_name: str) -> QueryCondition: def inner() -> sa.sql.expression.ColumnElement[bool]: @@ -271,3 +387,15 @@ def slot_name(ascending: bool = True) -> QueryOrder: if ascending: return ResourceAllocationRow.slot_name.asc() return ResourceAllocationRow.slot_name.desc() + + @staticmethod + def requested(ascending: bool = True) -> QueryOrder: + if ascending: + return ResourceAllocationRow.requested.asc() + return ResourceAllocationRow.requested.desc() + + @staticmethod + def used(ascending: bool = True) -> QueryOrder: + if ascending: + return ResourceAllocationRow.used.asc() + return ResourceAllocationRow.used.desc() From 7ffe35ea99375dd9c4fc01844f90b8e24c8ba8c7 Mon Sep 17 00:00:00 2001 From: HyeockJinKim Date: Fri, 6 Mar 2026 07:18:17 +0000 Subject: [PATCH 17/17] chore: update api schema dump Co-authored-by: octodog --- .../graphql-reference/supergraph.graphql | 60 ++++++++++++++++++- .../graphql-reference/v2-schema.graphql | 48 ++++++++++++++- 2 files changed, 102 insertions(+), 6 deletions(-) diff --git a/docs/manager/graphql-reference/supergraph.graphql b/docs/manager/graphql-reference/supergraph.graphql index 6b67e9b8cac..e6efdebee4d 100644 --- a/docs/manager/graphql-reference/supergraph.graphql +++ b/docs/manager/graphql-reference/supergraph.graphql @@ -385,6 +385,30 @@ type AgentResourceSlotEdge node: AgentResourceSlot! } +"""Added in 26.3.0. Filter criteria for querying agent resource slots.""" +input AgentResourceSlotFilter + @join__type(graph: STRAWBERRY) +{ + slotName: StringFilter = null +} + +"""Added in 26.3.0. Ordering specification for agent resource slots.""" +input AgentResourceSlotOrderBy + @join__type(graph: STRAWBERRY) +{ + field: AgentResourceSlotOrderField! + direction: OrderDirection! = ASC +} + +"""Added in 26.3.0. Fields available for ordering agent resource slots.""" +enum AgentResourceSlotOrderField + @join__type(graph: STRAWBERRY) +{ + SLOT_NAME @join__enumValue(graph: STRAWBERRY) + CAPACITY @join__enumValue(graph: STRAWBERRY) + USED @join__enumValue(graph: STRAWBERRY) +} + """Added in 25.15.0""" type AgentStats @join__type(graph: STRAWBERRY) @@ -572,8 +596,8 @@ type AgentV2 implements Node """ sessions(filter: SessionV2Filter = null, orderBy: [SessionV2OrderBy!] = null, before: String = null, after: String = null, first: Int = null, last: Int = null, limit: Int = null, offset: Int = null): SessionV2Connection! - """Added in 26.4.0. Per-slot resource capacity and usage for this agent.""" - resourceSlots(first: Int = null, after: String = null, last: Int = null, before: String = null, limit: Int = null, offset: Int = null): AgentResourceConnection! + """Added in 26.3.0. Per-slot resource capacity and usage for this agent.""" + resourceSlots(filter: AgentResourceSlotFilter = null, orderBy: [AgentResourceSlotOrderBy!] = null, first: Int = null, after: String = null, last: Int = null, before: String = null, limit: Int = null, offset: Int = null): AgentResourceConnection! } """ @@ -5524,6 +5548,36 @@ type KernelResourceAllocationEdge node: KernelResourceAllocation! } +""" +Added in 26.3.0. Filter criteria for querying kernel resource allocations. +""" +input KernelResourceAllocationFilter + @join__type(graph: STRAWBERRY) +{ + slotName: StringFilter = null +} + +""" +Added in 26.3.0. Ordering specification for kernel resource allocations. +""" +input KernelResourceAllocationOrderBy + @join__type(graph: STRAWBERRY) +{ + field: KernelResourceAllocationOrderField! + direction: OrderDirection! = ASC +} + +""" +Added in 26.3.0. Fields available for ordering kernel resource allocations. +""" +enum KernelResourceAllocationOrderField + @join__type(graph: STRAWBERRY) +{ + SLOT_NAME @join__enumValue(graph: STRAWBERRY) + REQUESTED @join__enumValue(graph: STRAWBERRY) + USED @join__enumValue(graph: STRAWBERRY) +} + """ Added in 26.2.0. Represents a kernel (compute container) in Backend.AI. """ @@ -5574,7 +5628,7 @@ type KernelV2 implements Node session: SessionV2 """Added in 26.3.0. Per-slot resource allocation for this kernel.""" - resourceAllocations(first: Int = null, after: String = null, last: Int = null, before: String = null, limit: Int = null, offset: Int = null): ResourceAllocationConnection! + resourceAllocations(filter: KernelResourceAllocationFilter = null, orderBy: [KernelResourceAllocationOrderBy!] = null, first: Int = null, after: String = null, last: Int = null, before: String = null, limit: Int = null, offset: Int = null): ResourceAllocationConnection! } """ diff --git a/docs/manager/graphql-reference/v2-schema.graphql b/docs/manager/graphql-reference/v2-schema.graphql index 540b32906c7..18f393bea29 100644 --- a/docs/manager/graphql-reference/v2-schema.graphql +++ b/docs/manager/graphql-reference/v2-schema.graphql @@ -209,6 +209,24 @@ type AgentResourceSlotEdge { node: AgentResourceSlot! } +"""Added in 26.3.0. Filter criteria for querying agent resource slots.""" +input AgentResourceSlotFilter { + slotName: StringFilter = null +} + +"""Added in 26.3.0. Ordering specification for agent resource slots.""" +input AgentResourceSlotOrderBy { + field: AgentResourceSlotOrderField! + direction: OrderDirection! = ASC +} + +"""Added in 26.3.0. Fields available for ordering agent resource slots.""" +enum AgentResourceSlotOrderField { + SLOT_NAME + CAPACITY + USED +} + """Added in 25.15.0""" type AgentStats { """Added in 25.15.0""" @@ -361,8 +379,8 @@ type AgentV2 implements Node { """ sessions(filter: SessionV2Filter = null, orderBy: [SessionV2OrderBy!] = null, before: String = null, after: String = null, first: Int = null, last: Int = null, limit: Int = null, offset: Int = null): SessionV2Connection! - """Added in 26.4.0. Per-slot resource capacity and usage for this agent.""" - resourceSlots(first: Int = null, after: String = null, last: Int = null, before: String = null, limit: Int = null, offset: Int = null): AgentResourceConnection! + """Added in 26.3.0. Per-slot resource capacity and usage for this agent.""" + resourceSlots(filter: AgentResourceSlotFilter = null, orderBy: [AgentResourceSlotOrderBy!] = null, first: Int = null, after: String = null, last: Int = null, before: String = null, limit: Int = null, offset: Int = null): AgentResourceConnection! } """ @@ -3085,6 +3103,30 @@ type KernelResourceAllocationEdge { node: KernelResourceAllocation! } +""" +Added in 26.3.0. Filter criteria for querying kernel resource allocations. +""" +input KernelResourceAllocationFilter { + slotName: StringFilter = null +} + +""" +Added in 26.3.0. Ordering specification for kernel resource allocations. +""" +input KernelResourceAllocationOrderBy { + field: KernelResourceAllocationOrderField! + direction: OrderDirection! = ASC +} + +""" +Added in 26.3.0. Fields available for ordering kernel resource allocations. +""" +enum KernelResourceAllocationOrderField { + SLOT_NAME + REQUESTED + USED +} + """ Added in 26.2.0. Represents a kernel (compute container) in Backend.AI. """ @@ -3132,7 +3174,7 @@ type KernelV2 implements Node { session: SessionV2 """Added in 26.3.0. Per-slot resource allocation for this kernel.""" - resourceAllocations(first: Int = null, after: String = null, last: Int = null, before: String = null, limit: Int = null, offset: Int = null): ResourceAllocationConnection! + resourceAllocations(filter: KernelResourceAllocationFilter = null, orderBy: [KernelResourceAllocationOrderBy!] = null, first: Int = null, after: String = null, last: Int = null, before: String = null, limit: Int = null, offset: Int = null): ResourceAllocationConnection! } """