diff --git a/changes/9708.feature.md b/changes/9708.feature.md new file mode 100644 index 00000000000..fdc7e022732 --- /dev/null +++ b/changes/9708.feature.md @@ -0,0 +1 @@ +Add GraphQL ResourceSlotTypeGQL node with root queries (resource_slot_type, resource_slot_types) and relay connections on AgentV2GQL (resource_slots) and KernelV2GQL (resource_allocations). diff --git a/docs/manager/graphql-reference/supergraph.graphql b/docs/manager/graphql-reference/supergraph.graphql index 2c23c80eb3b..e6efdebee4d 100644 --- a/docs/manager/graphql-reference/supergraph.graphql +++ b/docs/manager/graphql-reference/supergraph.graphql @@ -339,6 +339,76 @@ type AgentResource free: JSON! } +"""Added in 26.3.0. Relay-style connection for per-slot agent resources.""" +type AgentResourceConnection + @join__type(graph: STRAWBERRY) +{ + """Pagination data for this connection""" + pageInfo: PageInfo! + + """Contains the nodes in this connection""" + edges: [AgentResourceSlotEdge!]! + count: Int! +} + +""" +Added in 26.3.0. Per-slot resource capacity and usage entry for an agent. +Represents one row from the agent_resources table. +""" +type AgentResourceSlot implements Node + @join__implements(graph: STRAWBERRY, interface: "Node") + @join__type(graph: STRAWBERRY) +{ + """The Globally Unique ID of this object""" + id: ID! + + """Resource slot identifier (e.g., 'cpu', 'mem', 'cuda.device').""" + slotName: String! + + """Total hardware resource capacity for this slot on the agent.""" + capacity: Decimal! + + """ + Amount of this slot currently consumed by running and scheduled sessions. + """ + used: Decimal! +} + +"""An edge in a connection.""" +type AgentResourceSlotEdge + @join__type(graph: STRAWBERRY) +{ + """A cursor for use in pagination""" + cursor: String! + + """The item at the end of the edge""" + node: AgentResourceSlot! +} + +"""Added in 26.3.0. Filter criteria for querying agent resource slots.""" +input AgentResourceSlotFilter + @join__type(graph: STRAWBERRY) +{ + slotName: StringFilter = null +} + +"""Added in 26.3.0. Ordering specification for agent resource slots.""" +input AgentResourceSlotOrderBy + @join__type(graph: STRAWBERRY) +{ + field: AgentResourceSlotOrderField! + direction: OrderDirection! = ASC +} + +"""Added in 26.3.0. Fields available for ordering agent resource slots.""" +enum AgentResourceSlotOrderField + @join__type(graph: STRAWBERRY) +{ + SLOT_NAME @join__enumValue(graph: STRAWBERRY) + CAPACITY @join__enumValue(graph: STRAWBERRY) + USED @join__enumValue(graph: STRAWBERRY) +} + """Added in 25.15.0""" type AgentStats @join__type(graph: STRAWBERRY) @@ -525,6 +595,9 @@ type AgentV2 implements Node Added in 26.3.0. List of sessions running on this agent with pagination support. """ sessions(filter: SessionV2Filter = null, orderBy: [SessionV2OrderBy!] = null, before: String = null, after: String = null, first: Int = null, last: Int = null, limit: Int = null, offset: Int = null): SessionV2Connection! + + """Added in 26.3.0. Per-slot resource capacity and usage for this agent.""" + resourceSlots(filter: AgentResourceSlotFilter = null, orderBy: [AgentResourceSlotOrderBy!] = null, first: Int = null, after: String = null, last: Int = null, before: String = null, limit: Int = null, offset: Int = null): AgentResourceConnection! } """ @@ -5443,6 +5516,68 @@ type KernelNode implements Node preopen_ports: [Int] } +""" +Added in 26.3.0. Per-slot resource allocation entry for a kernel. +Represents one row from the resource_allocations table. +""" +type KernelResourceAllocation implements Node + @join__implements(graph: STRAWBERRY, interface: "Node") + @join__type(graph: STRAWBERRY) +{ + """The Globally Unique ID of this object""" + id: ID! + + """Resource slot identifier (e.g., 'cpu', 'mem', 'cuda.device').""" + slotName: String! + + """Amount of this resource slot originally requested for the kernel.""" + requested: Decimal! + + """Amount currently used. May be null if not yet measured.""" + used: Decimal +} + +"""An edge in a connection.""" +type KernelResourceAllocationEdge + @join__type(graph: STRAWBERRY) +{ + """A cursor for use in pagination""" + cursor: String! + + """The item at the end of the edge""" + node: KernelResourceAllocation! +} + +""" +Added in 26.3.0. Filter criteria for querying kernel resource allocations. +""" +input KernelResourceAllocationFilter + @join__type(graph: STRAWBERRY) +{ + slotName: StringFilter = null +} + +""" +Added in 26.3.0. Ordering specification for kernel resource allocations. +""" +input KernelResourceAllocationOrderBy + @join__type(graph: STRAWBERRY) +{ + field: KernelResourceAllocationOrderField! + direction: OrderDirection! = ASC +} + +""" +Added in 26.3.0. Fields available for ordering kernel resource allocations. +""" +enum KernelResourceAllocationOrderField + @join__type(graph: STRAWBERRY) +{ + SLOT_NAME @join__enumValue(graph: STRAWBERRY) + REQUESTED @join__enumValue(graph: STRAWBERRY) + USED @join__enumValue(graph: STRAWBERRY) +} + """ Added in 26.2.0. Represents a kernel (compute container) in Backend.AI. """ @@ -5491,6 +5626,9 @@ type KernelV2 implements Node """Added in 26.3.0. The session this kernel belongs to.""" session: SessionV2 + + """Added in 26.3.0. Per-slot resource allocation for this kernel.""" + resourceAllocations(filter: KernelResourceAllocationFilter = null, orderBy: [KernelResourceAllocationOrderBy!] = null, first: Int = null, after: String = null, last: Int = null, before: String = null, limit: Int = null, offset: Int = null): ResourceAllocationConnection! } """ @@ -7740,6 +7878,21 @@ enum NotificationRuleType ENDPOINT_LIFECYCLE_CHANGED @join__enumValue(graph: STRAWBERRY) } +""" +Added in 26.3.0. Display number format configuration for a resource slot type. +""" +type NumberFormat + @join__type(graph: STRAWBERRY) +{ + """ + Whether to use binary (1024-based) prefix instead of decimal (1000-based). + """ + binary: Boolean! + + """Number of decimal places to display.""" + roundLength: Int! +} + """Added in 25.14.0""" type ObjectStorage implements Node @join__implements(graph: STRAWBERRY, interface: "Node") @@ -9329,6 +9482,16 @@ type Query """ adminSessionsV2(filter: SessionV2Filter = null, orderBy: [SessionV2OrderBy!] = null, before: String = null, after: String = null, first: Int = null, last: Int = null, limit: Int = null, offset: Int = null): SessionV2Connection! @join__field(graph: STRAWBERRY) + """ + Added in 26.3.0. Returns a single resource slot type by slot_name, or null. + """ + resourceSlotType(slotName: String!): ResourceSlotType @join__field(graph: STRAWBERRY) + + """ + Added in 26.3.0. Returns resource slot types with pagination and filtering. + """ + resourceSlotTypes(filter: ResourceSlotTypeFilter = null, orderBy: [ResourceSlotTypeOrderBy!] = null, before: String = null, after: String = null, first: Int = null, last: Int = null, limit: Int = null, offset: Int = null): ResourceSlotTypeConnection! @join__field(graph: STRAWBERRY) + """ Added in 26.2.0. @@ -9829,6 +9992,20 @@ type ResourceAllocation used: ResourceSlot } +""" +Added in 26.3.0. Relay-style connection for per-slot kernel resource allocations. +""" +type ResourceAllocationConnection + @join__type(graph: STRAWBERRY) +{ + """Pagination data for this connection""" + pageInfo: PageInfo! + + """Contains the nodes in this connection""" + edges: [KernelResourceAllocationEdge!]! + count: Int! +} + type ResourceConfig @join__type(graph: STRAWBERRY) { @@ -10185,6 +10362,97 @@ input ResourceSlotInput entries: [ResourceSlotEntryInput!]! } +""" +Added in 26.3.0. A registered resource slot type describing display metadata +and formatting rules for a specific resource (e.g., cpu, mem, cuda.device). +""" +type ResourceSlotType implements Node + @join__implements(graph: STRAWBERRY, interface: "Node") + @join__type(graph: STRAWBERRY) +{ + """The Globally Unique ID of this object""" + id: ID! + + """ + Unique identifier for the resource slot (e.g., 'cpu', 'mem', 'cuda.device'). + """ + slotName: String! + + """Category of the slot type (e.g., 'count', 'bytes', 'unique-count').""" + slotType: String! + + """Human-readable name for display in UIs.""" + displayName: String! + + """Longer description of what this resource slot represents.""" + description: String! + + """ + Unit label used when displaying resource amounts (e.g., 'GiB', 'cores'). + """ + displayUnit: String! + + """Icon identifier for UI rendering (e.g., 'cpu', 'memory', 'gpu').""" + displayIcon: String! + + """Number formatting rules (binary vs decimal prefix, rounding).""" + numberFormat: NumberFormat! + + """Display ordering rank. Lower values appear first.""" + rank: Int! +} + +""" +Added in 26.3.0. Relay-style connection for paginated resource slot types. +""" +type ResourceSlotTypeConnection + @join__type(graph: STRAWBERRY) +{ + """Pagination data for this connection""" + pageInfo: PageInfo! + + """Contains the nodes in this connection""" + edges: [ResourceSlotTypeEdge!]! + count: Int! +} + +"""An edge in a connection.""" +type ResourceSlotTypeEdge + @join__type(graph: STRAWBERRY) +{ + """A cursor for use in pagination""" + cursor: String! + + """The item at the end of the edge""" + node: ResourceSlotType! +} + +"""Added in 26.3.0. Filter criteria for querying resource slot types.""" +input ResourceSlotTypeFilter + @join__type(graph: STRAWBERRY) +{ + slotName: StringFilter = null + slotType: StringFilter = null + displayName: StringFilter = null +} + +"""Added in 26.3.0. Ordering specification for resource slot types.""" +input ResourceSlotTypeOrderBy + @join__type(graph: STRAWBERRY) +{ + field: ResourceSlotTypeOrderField! + direction: OrderDirection! = ASC +} + +"""Added in 26.3.0. Fields available for ordering resource slot types.""" +enum ResourceSlotTypeOrderField + @join__type(graph: STRAWBERRY) +{ + SLOT_NAME @join__enumValue(graph: STRAWBERRY) + RANK @join__enumValue(graph: STRAWBERRY) + DISPLAY_NAME @join__enumValue(graph: STRAWBERRY) +} + """ Added in 26.2.0. Resource weight with default indicator. Shows whether this resource type's weight was explicitly set or uses default. """ diff --git a/docs/manager/graphql-reference/v2-schema.graphql b/docs/manager/graphql-reference/v2-schema.graphql index 3a7d9e6167d..18f393bea29 100644 --- a/docs/manager/graphql-reference/v2-schema.graphql +++ b/docs/manager/graphql-reference/v2-schema.graphql @@ -170,6 +170,63 @@ type AgentResource { free: JSON! } +"""Added in 26.3.0. Relay-style connection for per-slot agent resources.""" +type AgentResourceConnection { + """Pagination data for this connection""" + pageInfo: PageInfo! + + """Contains the nodes in this connection""" + edges: [AgentResourceSlotEdge!]! + count: Int! +} + +""" +Added in 26.3.0. Per-slot resource capacity and usage entry for an agent. +Represents one row from the agent_resources table. +""" +type AgentResourceSlot implements Node { + """The Globally Unique ID of this object""" + id: ID! + + """Resource slot identifier (e.g., 'cpu', 'mem', 'cuda.device').""" + slotName: String! + + """Total hardware resource capacity for this slot on the agent.""" + capacity: Decimal! + + """ + Amount of this slot currently consumed by running and scheduled sessions. + """ + used: Decimal! +} + +"""An edge in a connection.""" +type AgentResourceSlotEdge { + """A cursor for use in pagination""" + cursor: String! + + """The item at the end of the edge""" + node: AgentResourceSlot! +} + +"""Added in 26.3.0. Filter criteria for querying agent resource slots.""" +input AgentResourceSlotFilter { + slotName: StringFilter = null +} + +"""Added in 26.3.0. Ordering specification for agent resource slots.""" +input AgentResourceSlotOrderBy { + field: AgentResourceSlotOrderField! + direction: OrderDirection! = ASC +} + +"""Added in 26.3.0. Fields available for ordering agent resource slots.""" +enum AgentResourceSlotOrderField { + SLOT_NAME + CAPACITY + USED +} + """Added in 25.15.0""" type AgentStats { """Added in 25.15.0""" @@ -321,6 +378,9 @@ type AgentV2 implements Node { Added in 26.3.0. List of sessions running on this agent with pagination support. """ sessions(filter: SessionV2Filter = null, orderBy: [SessionV2OrderBy!] = null, before: String = null, after: String = null, first: Int = null, last: Int = null, limit: Int = null, offset: Int = null): SessionV2Connection! + + """Added in 26.3.0. Per-slot resource capacity and usage for this agent.""" + resourceSlots(filter: AgentResourceSlotFilter = null, orderBy: [AgentResourceSlotOrderBy!] = null, first: Int = null, after: String = null, last: Int = null, before: String = null, limit: Int = null, offset: Int = null): AgentResourceConnection! } """ @@ -3016,6 +3076,57 @@ The `JSON` scalar type represents JSON values as specified by [ECMA-404](https:/ """ scalar JSON @specifiedBy(url: "https://ecma-international.org/wp-content/uploads/ECMA-404_2nd_edition_december_2017.pdf") +""" +Added in 26.3.0. Per-slot resource allocation entry for a kernel. +Represents one row from the resource_allocations table. +""" +type KernelResourceAllocation implements Node { + """The Globally Unique ID of this object""" + id: ID! + + """Resource slot identifier (e.g., 'cpu', 'mem', 'cuda.device').""" + slotName: String! + + """Amount of this resource slot originally requested for the kernel.""" + requested: Decimal! + + """Amount currently used. May be null if not yet measured.""" + used: Decimal +} + +"""An edge in a connection.""" +type KernelResourceAllocationEdge { + """A cursor for use in pagination""" + cursor: String! + + """The item at the end of the edge""" + node: KernelResourceAllocation! +} + +""" +Added in 26.3.0. Filter criteria for querying kernel resource allocations. +""" +input KernelResourceAllocationFilter { + slotName: StringFilter = null +} + +""" +Added in 26.3.0. Ordering specification for kernel resource allocations. +""" +input KernelResourceAllocationOrderBy { + field: KernelResourceAllocationOrderField! + direction: OrderDirection! = ASC +} + +""" +Added in 26.3.0. Fields available for ordering kernel resource allocations. +""" +enum KernelResourceAllocationOrderField { + SLOT_NAME + REQUESTED + USED +} + """ Added in 26.2.0. Represents a kernel (compute container) in Backend.AI. """ @@ -3061,6 +3172,9 @@ type KernelV2 implements Node { """Added in 26.3.0. The session this kernel belongs to.""" session: SessionV2 + + """Added in 26.3.0. Per-slot resource allocation for this kernel.""" + resourceAllocations(filter: KernelResourceAllocationFilter = null, orderBy: [KernelResourceAllocationOrderBy!] = null, first: Int = null, after: String = null, last: Int = null, before: String = null, limit: Int = null, offset: Int = null): ResourceAllocationConnection! } """ @@ -4090,6 +4204,19 @@ enum NotificationRuleType { ENDPOINT_LIFECYCLE_CHANGED } +""" +Added in 26.3.0. Display number format configuration for a resource slot type. +""" +type NumberFormat { + """ + Whether to use binary (1024-based) prefix instead of decimal (1000-based). + """ + binary: Boolean! + + """Number of decimal places to display.""" + roundLength: Int! +} + """Added in 25.14.0""" type ObjectStorage implements Node { """The Globally Unique ID of this object""" @@ -5056,6 +5183,16 @@ type Query { """ adminSessionsV2(filter: SessionV2Filter = null, orderBy: [SessionV2OrderBy!] = null, before: String = null, after: String = null, first: Int = null, last: Int = null, limit: Int = null, offset: Int = null): SessionV2Connection! + """ + Added in 26.3.0. Returns a single resource slot type by slot_name, or null. + """ + resourceSlotType(slotName: String!): ResourceSlotType + + """ + Added in 26.3.0. Returns resource slot types with pagination and filtering. + """ + resourceSlotTypes(filter: ResourceSlotTypeFilter = null, orderBy: [ResourceSlotTypeOrderBy!] = null, before: String = null, after: String = null, first: Int = null, last: Int = null, limit: Int = null, offset: Int = null): ResourceSlotTypeConnection! + """ Added in 26.2.0. @@ -5560,6 +5697,18 @@ type ResourceAllocation { used: ResourceSlot } +""" +Added in 26.3.0. Relay-style connection for per-slot kernel resource allocations. +""" +type ResourceAllocationConnection { + """Pagination data for this connection""" + pageInfo: PageInfo! + + """Contains the nodes in this connection""" + edges: [KernelResourceAllocationEdge!]! + count: Int! +} + type ResourceConfig { """ Added in 26.1.0. Allocated compute resources including CPU, memory, and accelerators. @@ -5834,6 +5983,84 @@ input ResourceSlotInput { entries: [ResourceSlotEntryInput!]! } +""" +Added in 26.3.0. A registered resource slot type describing display metadata +and formatting rules for a specific resource (e.g., cpu, mem, cuda.device). +""" +type ResourceSlotType implements Node { + """The Globally Unique ID of this object""" + id: ID! + + """ + Unique identifier for the resource slot (e.g., 'cpu', 'mem', 'cuda.device'). + """ + slotName: String! + + """Category of the slot type (e.g., 'count', 'bytes', 'unique-count').""" + slotType: String! + + """Human-readable name for display in UIs.""" + displayName: String! + + """Longer description of what this resource slot represents.""" + description: String! + + """ + Unit label used when displaying resource amounts (e.g., 'GiB', 'cores'). + """ + displayUnit: String! + + """Icon identifier for UI rendering (e.g., 'cpu', 'memory', 'gpu').""" + displayIcon: String! + + """Number formatting rules (binary vs decimal prefix, rounding).""" + numberFormat: NumberFormat! + + """Display ordering rank. Lower values appear first.""" + rank: Int! +} + +""" +Added in 26.3.0. Relay-style connection for paginated resource slot types. +""" +type ResourceSlotTypeConnection { + """Pagination data for this connection""" + pageInfo: PageInfo! + + """Contains the nodes in this connection""" + edges: [ResourceSlotTypeEdge!]! + count: Int! +} + +"""An edge in a connection.""" +type ResourceSlotTypeEdge { + """A cursor for use in pagination""" + cursor: String! + + """The item at the end of the edge""" + node: ResourceSlotType! +} + +"""Added in 26.3.0. Filter criteria for querying resource slot types.""" +input ResourceSlotTypeFilter { + slotName: StringFilter = null + slotType: StringFilter = null + displayName: StringFilter = null +} + +"""Added in 26.3.0. Ordering specification for resource slot types.""" +input ResourceSlotTypeOrderBy { + field: ResourceSlotTypeOrderField! + direction: OrderDirection! = ASC +} + +"""Added in 26.3.0. Fields available for ordering resource slot types.""" +enum ResourceSlotTypeOrderField { + SLOT_NAME + RANK + DISPLAY_NAME +} + """ Added in 26.2.0. Resource weight with default indicator. Shows whether this resource type's weight was explicitly set or uses default. """ diff --git a/src/ai/backend/manager/api/gql/agent/types.py b/src/ai/backend/manager/api/gql/agent/types.py index 4bb1566c721..3fb44d5662e 100644 --- a/src/ai/backend/manager/api/gql/agent/types.py +++ b/src/ai/backend/manager/api/gql/agent/types.py @@ -20,6 +20,11 @@ KernelV2FilterGQL, KernelV2OrderByGQL, ) + from ai.backend.manager.api.gql.resource_slot.types import ( + AgentResourceConnectionGQL, + AgentResourceSlotFilterGQL, + AgentResourceSlotOrderByGQL, + ) from ai.backend.manager.api.gql.session.types import ( SessionV2ConnectionGQL, SessionV2FilterGQL, @@ -489,6 +494,50 @@ async def sessions( base_conditions=[SessionConditions.by_agent_id(self._agent_id)], ) + @strawberry.field( # type: ignore[misc] + description="Added in 26.3.0. Per-slot resource capacity and usage for this agent." + ) + async def resource_slots( + self, + info: Info[StrawberryGQLContext], + filter: Annotated[ + AgentResourceSlotFilterGQL, + strawberry.lazy("ai.backend.manager.api.gql.resource_slot.types"), + ] + | None = None, + order_by: list[ + Annotated[ + AgentResourceSlotOrderByGQL, + strawberry.lazy("ai.backend.manager.api.gql.resource_slot.types"), + ] + ] + | None = None, + first: int | None = None, + after: str | None = None, + last: int | None = None, + before: str | None = None, + limit: int | None = None, + offset: int | None = None, + ) -> Annotated[ + AgentResourceConnectionGQL, + strawberry.lazy("ai.backend.manager.api.gql.resource_slot.types"), + ]: + """Fetch per-slot resource capacity and usage for this agent.""" + from ai.backend.manager.api.gql.resource_slot.fetcher import fetch_agent_resources + + return await fetch_agent_resources( + info=info, + agent_id=str(self._agent_id), + filter=filter, + order_by=order_by, + first=first, + after=after, + last=last, + before=before, + limit=limit, + offset=offset, + ) + @classmethod async def resolve_nodes( # type: ignore[override] # Strawberry Node uses AwaitableOrValue overloads incompatible with async def cls, diff --git a/src/ai/backend/manager/api/gql/kernel/types.py b/src/ai/backend/manager/api/gql/kernel/types.py index c19ffaec1bd..d88e86271a7 100644 --- a/src/ai/backend/manager/api/gql/kernel/types.py +++ b/src/ai/backend/manager/api/gql/kernel/types.py @@ -17,6 +17,11 @@ from ai.backend.manager.api.gql.base import OrderDirection, UUIDFilter if TYPE_CHECKING: + from ai.backend.manager.api.gql.resource_slot.types import ( + KernelResourceAllocationFilterGQL, + KernelResourceAllocationOrderByGQL, + ResourceAllocationConnectionGQL, + ) from ai.backend.manager.api.gql.session.types import SessionV2GQL from ai.backend.manager.repositories.base import QueryCondition @@ -464,6 +469,50 @@ async def session( ): raise NotImplementedError + @strawberry.field( # type: ignore[misc] + description="Added in 26.3.0. Per-slot resource allocation for this kernel." + ) + async def resource_allocations( + self, + info: Info[StrawberryGQLContext], + filter: Annotated[ + KernelResourceAllocationFilterGQL, + strawberry.lazy("ai.backend.manager.api.gql.resource_slot.types"), + ] + | None = None, + order_by: list[ + Annotated[ + KernelResourceAllocationOrderByGQL, + strawberry.lazy("ai.backend.manager.api.gql.resource_slot.types"), + ] + ] + | None = None, + first: int | None = None, + after: str | None = None, + last: int | None = None, + before: str | None = None, + limit: int | None = None, + offset: int | None = None, + ) -> Annotated[ + ResourceAllocationConnectionGQL, + strawberry.lazy("ai.backend.manager.api.gql.resource_slot.types"), + ]: + """Fetch per-slot resource allocation for this kernel.""" + from ai.backend.manager.api.gql.resource_slot.fetcher import fetch_kernel_allocations + + return await fetch_kernel_allocations( + info=info, + kernel_id=str(self.id), + filter=filter, + order_by=order_by, + first=first, + after=after, + last=last, + before=before, + limit=limit, + offset=offset, + ) + @classmethod async def resolve_nodes( # type: ignore[override] # Strawberry Node uses AwaitableOrValue overloads incompatible with async def cls, diff --git a/src/ai/backend/manager/api/gql/resource_slot/__init__.py b/src/ai/backend/manager/api/gql/resource_slot/__init__.py new file mode 100644 index 00000000000..9d48db4f9f8 --- /dev/null +++ b/src/ai/backend/manager/api/gql/resource_slot/__init__.py @@ -0,0 +1 @@ +from __future__ import annotations diff --git a/src/ai/backend/manager/api/gql/resource_slot/fetcher.py b/src/ai/backend/manager/api/gql/resource_slot/fetcher.py new file mode 100644 index 00000000000..ea93c112382 --- /dev/null +++ b/src/ai/backend/manager/api/gql/resource_slot/fetcher.py @@ -0,0 +1,323 @@ +"""Fetcher functions for resource slot GQL queries. + +These functions are shared between root queries (resolver.py) and node connection +resolvers (AgentV2GQL.resource_slots, KernelV2GQL.resource_allocations) to avoid +duplicating query logic. +""" + +from __future__ import annotations + +import uuid as _uuid +from functools import lru_cache + +import strawberry +from strawberry import Info + +from ai.backend.manager.api.gql.adapter import PaginationOptions, PaginationSpec +from ai.backend.manager.api.gql.base import encode_cursor +from ai.backend.manager.api.gql.types import StrawberryGQLContext +from ai.backend.manager.data.resource_slot.types import ( + AgentResourceData, + ResourceAllocationData, + ResourceSlotTypeData, +) +from ai.backend.manager.models.resource_slot import ( + AgentResourceRow, + ResourceAllocationRow, + ResourceSlotTypeRow, +) +from ai.backend.manager.repositories.resource_slot.query import ( + AgentResourceQueryConditions, + AgentResourceQueryOrders, + CursorConditions, + QueryOrders, + ResourceAllocationQueryConditions, + ResourceAllocationQueryOrders, +) +from ai.backend.manager.services.resource_slot.actions.get_agent_resource_by_slot import ( + GetAgentResourceBySlotAction, +) +from ai.backend.manager.services.resource_slot.actions.get_kernel_allocation_by_slot import ( + GetKernelAllocationBySlotAction, +) +from ai.backend.manager.services.resource_slot.actions.get_resource_slot_type import ( + GetResourceSlotTypeAction, +) +from ai.backend.manager.services.resource_slot.actions.search_agent_resources import ( + SearchAgentResourcesAction, +) +from ai.backend.manager.services.resource_slot.actions.search_resource_allocations import ( + SearchResourceAllocationsAction, +) +from ai.backend.manager.services.resource_slot.actions.search_resource_slot_types import ( + SearchResourceSlotTypesAction, +) + +from .types import ( + AgentResourceConnectionGQL, + AgentResourceSlotEdgeGQL, + AgentResourceSlotFilterGQL, + AgentResourceSlotGQL, + AgentResourceSlotOrderByGQL, + KernelResourceAllocationEdgeGQL, + KernelResourceAllocationFilterGQL, + KernelResourceAllocationGQL, + KernelResourceAllocationOrderByGQL, + ResourceAllocationConnectionGQL, + ResourceSlotTypeConnectionGQL, + ResourceSlotTypeEdgeGQL, + ResourceSlotTypeFilterGQL, + ResourceSlotTypeGQL, + ResourceSlotTypeOrderByGQL, +) + + +@lru_cache(maxsize=1) +def _get_slot_type_pagination_spec() -> PaginationSpec: + return PaginationSpec( + forward_order=QueryOrders.slot_name(ascending=True), + backward_order=QueryOrders.slot_name(ascending=False), + forward_condition_factory=CursorConditions.by_cursor_forward, + backward_condition_factory=CursorConditions.by_cursor_backward, + tiebreaker_order=ResourceSlotTypeRow.slot_name.asc(), + ) + + +@lru_cache(maxsize=1) +def _get_agent_resource_pagination_spec() -> PaginationSpec: + return PaginationSpec( + forward_order=AgentResourceQueryOrders.slot_name(ascending=True), + backward_order=AgentResourceQueryOrders.slot_name(ascending=False), + forward_condition_factory=AgentResourceQueryConditions.by_cursor_forward, + backward_condition_factory=AgentResourceQueryConditions.by_cursor_backward, + tiebreaker_order=AgentResourceRow.slot_name.asc(), + ) + + +@lru_cache(maxsize=1) +def _get_resource_allocation_pagination_spec() -> PaginationSpec: + return PaginationSpec( + forward_order=ResourceAllocationQueryOrders.slot_name(ascending=True), + backward_order=ResourceAllocationQueryOrders.slot_name(ascending=False), + forward_condition_factory=ResourceAllocationQueryConditions.by_cursor_forward, + backward_condition_factory=ResourceAllocationQueryConditions.by_cursor_backward, + tiebreaker_order=ResourceAllocationRow.slot_name.asc(), + ) + + +async def fetch_resource_slot_types( + info: Info[StrawberryGQLContext], + filter: ResourceSlotTypeFilterGQL | None = None, + order_by: list[ResourceSlotTypeOrderByGQL] | None = None, + before: str | None = None, + after: str | None = None, + first: int | None = None, + last: int | None = None, + limit: int | None = None, + offset: int | None = None, +) -> ResourceSlotTypeConnectionGQL: + """Fetch resource slot types with pagination and filtering.""" + querier = info.context.gql_adapter.build_querier( + PaginationOptions( + first=first, + after=after, + last=last, + before=before, + limit=limit, + offset=offset, + ), + pagination_spec=_get_slot_type_pagination_spec(), + filter=filter, + order_by=order_by, + ) + + action_result = ( + await info.context.processors.resource_slot.search_resource_slot_types.wait_for_complete( + SearchResourceSlotTypesAction(querier=querier) + ) + ) + + nodes = [ResourceSlotTypeGQL.from_data(data) for data in action_result.items] + edges = [ResourceSlotTypeEdgeGQL(node=node, cursor=encode_cursor(node.id)) for node in nodes] + + return ResourceSlotTypeConnectionGQL( + edges=edges, + page_info=strawberry.relay.PageInfo( + has_next_page=action_result.has_next_page, + has_previous_page=action_result.has_previous_page, + start_cursor=edges[0].cursor if edges else None, + end_cursor=edges[-1].cursor if edges else None, + ), + count=action_result.total_count, + ) + + +async def fetch_resource_slot_type( + info: Info[StrawberryGQLContext], + slot_name: str, +) -> ResourceSlotTypeGQL: + """Fetch a single resource slot type by slot_name (used by Node resolution and root query).""" + action_result = ( + await info.context.processors.resource_slot.get_resource_slot_type.wait_for_complete( + GetResourceSlotTypeAction(slot_name=slot_name) + ) + ) + return ResourceSlotTypeGQL.from_data(action_result.item) + + +async def fetch_agent_resources( + info: Info[StrawberryGQLContext], + agent_id: str, + filter: AgentResourceSlotFilterGQL | None = None, + order_by: list[AgentResourceSlotOrderByGQL] | None = None, + before: str | None = None, + after: str | None = None, + first: int | None = None, + last: int | None = None, + limit: int | None = None, + offset: int | None = None, +) -> AgentResourceConnectionGQL: + """Fetch per-slot resource entries for a given agent with pagination and filtering.""" + querier = info.context.gql_adapter.build_querier( + PaginationOptions( + first=first, + after=after, + last=last, + before=before, + limit=limit, + offset=offset, + ), + pagination_spec=_get_agent_resource_pagination_spec(), + filter=filter, + order_by=order_by, + base_conditions=[AgentResourceQueryConditions.by_agent_id(agent_id)], + ) + + action_result = ( + await info.context.processors.resource_slot.search_agent_resources.wait_for_complete( + SearchAgentResourcesAction(querier=querier) + ) + ) + + edges = [] + for data in action_result.items: + node = AgentResourceSlotGQL.from_data(data) + cursor = encode_cursor(data.slot_name) + edges.append(AgentResourceSlotEdgeGQL(node=node, cursor=cursor)) + + return AgentResourceConnectionGQL( + count=action_result.total_count, + edges=edges, + page_info=strawberry.relay.PageInfo( + has_next_page=action_result.has_next_page, + has_previous_page=action_result.has_previous_page, + start_cursor=edges[0].cursor if edges else None, + end_cursor=edges[-1].cursor if edges else None, + ), + ) + + +async def fetch_kernel_allocations( + info: Info[StrawberryGQLContext], + kernel_id: str, + filter: KernelResourceAllocationFilterGQL | None = None, + order_by: list[KernelResourceAllocationOrderByGQL] | None = None, + before: str | None = None, + after: str | None = None, + first: int | None = None, + last: int | None = None, + limit: int | None = None, + offset: int | None = None, +) -> ResourceAllocationConnectionGQL: + """Fetch per-slot allocation entries for a kernel with pagination and filtering.""" + querier = info.context.gql_adapter.build_querier( + PaginationOptions( + first=first, + after=after, + last=last, + before=before, + limit=limit, + offset=offset, + ), + pagination_spec=_get_resource_allocation_pagination_spec(), + filter=filter, + order_by=order_by, + base_conditions=[ResourceAllocationQueryConditions.by_kernel_id(_uuid.UUID(kernel_id))], + ) + + action_result = ( + await info.context.processors.resource_slot.search_resource_allocations.wait_for_complete( + SearchResourceAllocationsAction(querier=querier) + ) + ) + + edges = [] + for data in action_result.items: + node = KernelResourceAllocationGQL.from_data(data) + cursor = encode_cursor(data.slot_name) + edges.append(KernelResourceAllocationEdgeGQL(node=node, cursor=cursor)) + + return ResourceAllocationConnectionGQL( + count=action_result.total_count, + edges=edges, + page_info=strawberry.relay.PageInfo( + has_next_page=action_result.has_next_page, + has_previous_page=action_result.has_previous_page, + start_cursor=edges[0].cursor if edges else None, + end_cursor=edges[-1].cursor if edges else None, + ), + ) + + +# ========== Raw data helpers for Node.resolve_nodes ========== +# These return raw data types so that resolve_nodes can call cls.from_data(), +# which enables mypy to correctly infer the return type as Iterable[Self | None]. + + +async def load_resource_slot_type_data( + info: Info[StrawberryGQLContext], + slot_name: str, +) -> ResourceSlotTypeData: + """Load raw ResourceSlotTypeData for a single slot_name (used by Node.resolve_nodes).""" + action_result = ( + await info.context.processors.resource_slot.get_resource_slot_type.wait_for_complete( + GetResourceSlotTypeAction(slot_name=slot_name) + ) + ) + return action_result.item + + +async def load_agent_resource_data( + info: Info[StrawberryGQLContext], + agent_id: str, + slot_name: str, +) -> AgentResourceData: + """Load raw AgentResourceData for a single agent+slot (used by Node.resolve_nodes). + + Raises AgentResourceNotFound if the entry does not exist. + """ + action_result = ( + await info.context.processors.resource_slot.get_agent_resource_by_slot.wait_for_complete( + GetAgentResourceBySlotAction(agent_id=agent_id, slot_name=slot_name) + ) + ) + return action_result.item + + +async def load_kernel_allocation_data( + info: Info[StrawberryGQLContext], + kernel_id_str: str, + slot_name: str, +) -> ResourceAllocationData: + """Load raw ResourceAllocationData for a single kernel+slot (used by Node.resolve_nodes). + + Raises ResourceAllocationNotFound if the entry does not exist. + """ + action_result = ( + await info.context.processors.resource_slot.get_kernel_allocation_by_slot.wait_for_complete( + GetKernelAllocationBySlotAction( + kernel_id=_uuid.UUID(kernel_id_str), slot_name=slot_name + ) + ) + ) + return action_result.item diff --git a/src/ai/backend/manager/api/gql/resource_slot/resolver.py b/src/ai/backend/manager/api/gql/resource_slot/resolver.py new file mode 100644 index 00000000000..c764ae71a7a --- /dev/null +++ b/src/ai/backend/manager/api/gql/resource_slot/resolver.py @@ -0,0 +1,53 @@ +"""Root query resolvers for resource slot type queries.""" + +from __future__ import annotations + +import strawberry +from strawberry import Info + +from ai.backend.manager.api.gql.types import StrawberryGQLContext + +from .fetcher import fetch_resource_slot_type, fetch_resource_slot_types +from .types import ( + ResourceSlotTypeConnectionGQL, + ResourceSlotTypeFilterGQL, + ResourceSlotTypeGQL, + ResourceSlotTypeOrderByGQL, +) + + +@strawberry.field( + description="Added in 26.3.0. Returns a single resource slot type by slot_name, or null." +) # type: ignore[misc] +async def resource_slot_type( + info: Info[StrawberryGQLContext], + slot_name: str, +) -> ResourceSlotTypeGQL | None: + return await fetch_resource_slot_type(info, slot_name) + + +@strawberry.field( + description="Added in 26.3.0. Returns resource slot types with pagination and filtering." +) # type: ignore[misc] +async def resource_slot_types( + info: Info[StrawberryGQLContext], + filter: ResourceSlotTypeFilterGQL | None = None, + order_by: list[ResourceSlotTypeOrderByGQL] | None = None, + before: str | None = None, + after: str | None = None, + first: int | None = None, + last: int | None = None, + limit: int | None = None, + offset: int | None = None, +) -> ResourceSlotTypeConnectionGQL: + return await fetch_resource_slot_types( + info, + filter=filter, + order_by=order_by, + before=before, + after=after, + first=first, + last=last, + limit=limit, + offset=offset, + ) diff --git a/src/ai/backend/manager/api/gql/resource_slot/types.py b/src/ai/backend/manager/api/gql/resource_slot/types.py new file mode 100644 index 00000000000..52387317162 --- /dev/null +++ b/src/ai/backend/manager/api/gql/resource_slot/types.py @@ -0,0 +1,477 @@ +"""GraphQL types for resource slot management. + +Covers: +- ResourceSlotTypeGQL: Registry node for a known resource slot type (resource_slot_types table) +- AgentResourceSlotGQL: Per-slot capacity/usage on an agent (agent_resources table) +- KernelResourceAllocationGQL: Per-slot allocation for a kernel (resource_allocations table) +""" + +from __future__ import annotations + +from collections.abc import Iterable +from decimal import Decimal +from enum import StrEnum +from typing import Any, Self + +import strawberry +from strawberry import ID, Info +from strawberry.relay import Connection, Edge, Node, NodeID + +from ai.backend.manager.api.gql.base import OrderDirection, StringFilter +from ai.backend.manager.api.gql.types import GQLFilter, GQLOrderBy, StrawberryGQLContext +from ai.backend.manager.api.gql.utils import dedent_strip +from ai.backend.manager.data.resource_slot.types import ( + AgentResourceData, + NumberFormatData, + ResourceAllocationData, + ResourceSlotTypeData, +) +from ai.backend.manager.repositories.base import QueryCondition, QueryOrder +from ai.backend.manager.repositories.resource_slot.query import ( + AgentResourceQueryConditions, + AgentResourceQueryOrders, + QueryConditions, + QueryOrders, + ResourceAllocationQueryConditions, + ResourceAllocationQueryOrders, +) + +# ========== NumberFormat ========== + + +@strawberry.type( + name="NumberFormat", + description="Added in 26.3.0. Display number format configuration for a resource slot type.", +) +class NumberFormatGQL: + binary: bool = strawberry.field( + description="Whether to use binary (1024-based) prefix instead of decimal (1000-based)." + ) + round_length: int = strawberry.field(description="Number of decimal places to display.") + + @classmethod + def from_data(cls, data: NumberFormatData) -> Self: + return cls(binary=data.binary, round_length=data.round_length) + + +# ========== ResourceSlotTypeGQL (Node) ========== + + +@strawberry.type( + name="ResourceSlotType", + description=dedent_strip(""" + Added in 26.3.0. A registered resource slot type describing display metadata + and formatting rules for a specific resource (e.g., cpu, mem, cuda.device). + """), +) +class ResourceSlotTypeGQL(Node): + id: NodeID[str] + slot_name: str = strawberry.field( + description="Unique identifier for the resource slot (e.g., 'cpu', 'mem', 'cuda.device')." + ) + slot_type: str = strawberry.field( + description="Category of the slot type (e.g., 'count', 'bytes', 'unique-count')." + ) + display_name: str = strawberry.field(description="Human-readable name for display in UIs.") + description: str = strawberry.field( + description="Longer description of what this resource slot represents." + ) + display_unit: str = strawberry.field( + description="Unit label used when displaying resource amounts (e.g., 'GiB', 'cores')." + ) + display_icon: str = strawberry.field( + description="Icon identifier for UI rendering (e.g., 'cpu', 'memory', 'gpu')." + ) + number_format: NumberFormatGQL = strawberry.field( + description="Number formatting rules (binary vs decimal prefix, rounding)." + ) + rank: int = strawberry.field(description="Display ordering rank. Lower values appear first.") + + @classmethod + async def resolve_nodes( # type: ignore[override] + cls, + *, + info: Info[StrawberryGQLContext], + node_ids: Iterable[str], + required: bool = False, + ) -> Iterable[Self | None]: + from ai.backend.manager.api.gql.resource_slot.fetcher import load_resource_slot_type_data + from ai.backend.manager.errors.resource_slot import ResourceSlotTypeNotFound + + results: list[Self | None] = [] + for slot_name in node_ids: + try: + data = await load_resource_slot_type_data(info, slot_name) + except ResourceSlotTypeNotFound: + if required: + raise + results.append(None) + else: + results.append(cls.from_data(data)) + return results + + @classmethod + def from_data(cls, data: ResourceSlotTypeData) -> Self: + return cls( + id=ID(data.slot_name), + slot_name=data.slot_name, + slot_type=data.slot_type, + display_name=data.display_name, + description=data.description, + display_unit=data.display_unit, + display_icon=data.display_icon, + number_format=NumberFormatGQL.from_data(data.number_format), + rank=data.rank, + ) + + +ResourceSlotTypeEdgeGQL = Edge[ResourceSlotTypeGQL] + + +@strawberry.type( + name="ResourceSlotTypeConnection", + description="Added in 26.3.0. Relay-style connection for paginated resource slot types.", +) +class ResourceSlotTypeConnectionGQL(Connection[ResourceSlotTypeGQL]): + count: int + + def __init__(self, *args: Any, count: int, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + self.count = count + + +# ========== ResourceSlotType Filter/OrderBy ========== + + +@strawberry.enum( + name="ResourceSlotTypeOrderField", + description="Added in 26.3.0. Fields available for ordering resource slot types.", +) +class ResourceSlotTypeOrderFieldGQL(StrEnum): + SLOT_NAME = "slot_name" + RANK = "rank" + DISPLAY_NAME = "display_name" + + +@strawberry.input( + name="ResourceSlotTypeFilter", + description="Added in 26.3.0. Filter criteria for querying resource slot types.", +) +class ResourceSlotTypeFilterGQL(GQLFilter): + slot_name: StringFilter | None = None + slot_type: StringFilter | None = None + display_name: StringFilter | None = None + + def build_conditions(self) -> list[QueryCondition]: + conditions: list[QueryCondition] = [] + if self.slot_name: + condition = self.slot_name.build_query_condition( + contains_factory=QueryConditions.by_slot_name_contains, + equals_factory=QueryConditions.by_slot_name_equals, + starts_with_factory=QueryConditions.by_slot_name_starts_with, + ends_with_factory=QueryConditions.by_slot_name_ends_with, + ) + if condition: + conditions.append(condition) + if self.slot_type: + condition = self.slot_type.build_query_condition( + contains_factory=QueryConditions.by_slot_type_contains, + equals_factory=QueryConditions.by_slot_type_equals, + starts_with_factory=QueryConditions.by_slot_type_starts_with, + ends_with_factory=QueryConditions.by_slot_type_ends_with, + ) + if condition: + conditions.append(condition) + if self.display_name: + condition = self.display_name.build_query_condition( + contains_factory=QueryConditions.by_display_name_contains, + equals_factory=QueryConditions.by_display_name_equals, + starts_with_factory=QueryConditions.by_display_name_starts_with, + ends_with_factory=QueryConditions.by_display_name_ends_with, + ) + if condition: + conditions.append(condition) + return conditions + + +@strawberry.input( + name="ResourceSlotTypeOrderBy", + description="Added in 26.3.0. Ordering specification for resource slot types.", +) +class ResourceSlotTypeOrderByGQL(GQLOrderBy): + field: ResourceSlotTypeOrderFieldGQL + direction: OrderDirection = OrderDirection.ASC + + def to_query_order(self) -> QueryOrder: + ascending = self.direction == OrderDirection.ASC + match self.field: + case ResourceSlotTypeOrderFieldGQL.SLOT_NAME: + return QueryOrders.slot_name(ascending) + case ResourceSlotTypeOrderFieldGQL.RANK: + return QueryOrders.rank(ascending) + case ResourceSlotTypeOrderFieldGQL.DISPLAY_NAME: + return QueryOrders.display_name(ascending) + case _: + raise ValueError(f"Unhandled ResourceSlotTypeOrderFieldGQL value: {self.field!r}") + + +# ========== AgentResourceSlotGQL (Node) ========== + + +@strawberry.type( + name="AgentResourceSlot", + description=dedent_strip(""" + Added in 26.3.0. Per-slot resource capacity and usage entry for an agent. + Represents one row from the agent_resources table. + """), +) +class AgentResourceSlotGQL(Node): + """Per-agent, per-slot resource capacity and usage.""" + + id: NodeID[str] + slot_name: str = strawberry.field( + description="Resource slot identifier (e.g., 'cpu', 'mem', 'cuda.device')." + ) + capacity: Decimal = strawberry.field( + description="Total hardware resource capacity for this slot on the agent." + ) + used: Decimal = strawberry.field( + description="Amount of this slot currently consumed by running and scheduled sessions." + ) + + @classmethod + async def resolve_nodes( # type: ignore[override] + cls, + *, + info: Info[StrawberryGQLContext], + node_ids: Iterable[str], + required: bool = False, + ) -> Iterable[Self | None]: + # Node ID format: "{agent_id}:{slot_name}" + from ai.backend.manager.api.gql.resource_slot.fetcher import load_agent_resource_data + from ai.backend.manager.errors.resource_slot import AgentResourceNotFound + + results: list[Self | None] = [] + for node_id in node_ids: + agent_id, _, slot_name = node_id.partition(":") + try: + data = await load_agent_resource_data(info, agent_id, slot_name) + except AgentResourceNotFound: + if required: + raise + results.append(None) + else: + results.append(cls.from_data(data)) + return results + + @classmethod + def from_data(cls, data: AgentResourceData) -> Self: + node_id = f"{data.agent_id}:{data.slot_name}" + return cls( + id=ID(node_id), + slot_name=data.slot_name, + capacity=data.capacity, + used=data.used, + ) + + +AgentResourceSlotEdgeGQL = Edge[AgentResourceSlotGQL] + + +@strawberry.type( + name="AgentResourceConnection", + description="Added in 26.3.0. Relay-style connection for per-slot agent resources.", +) +class AgentResourceConnectionGQL(Connection[AgentResourceSlotGQL]): + count: int + + def __init__(self, *args: Any, count: int, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + self.count = count + + +# ========== AgentResourceSlot Filter/OrderBy ========== + + +@strawberry.enum( + name="AgentResourceSlotOrderField", + description="Added in 26.3.0. Fields available for ordering agent resource slots.", +) +class AgentResourceSlotOrderFieldGQL(StrEnum): + SLOT_NAME = "slot_name" + CAPACITY = "capacity" + USED = "used" + + +@strawberry.input( + name="AgentResourceSlotFilter", + description="Added in 26.3.0. Filter criteria for querying agent resource slots.", +) +class AgentResourceSlotFilterGQL(GQLFilter): + slot_name: StringFilter | None = None + + def build_conditions(self) -> list[QueryCondition]: + conditions: list[QueryCondition] = [] + if self.slot_name: + condition = self.slot_name.build_query_condition( + contains_factory=AgentResourceQueryConditions.by_slot_name_contains, + equals_factory=AgentResourceQueryConditions.by_slot_name_equals, + starts_with_factory=AgentResourceQueryConditions.by_slot_name_starts_with, + ends_with_factory=AgentResourceQueryConditions.by_slot_name_ends_with, + ) + if condition: + conditions.append(condition) + return conditions + + +@strawberry.input( + name="AgentResourceSlotOrderBy", + description="Added in 26.3.0. Ordering specification for agent resource slots.", +) +class AgentResourceSlotOrderByGQL(GQLOrderBy): + field: AgentResourceSlotOrderFieldGQL + direction: OrderDirection = OrderDirection.ASC + + def to_query_order(self) -> QueryOrder: + ascending = self.direction == OrderDirection.ASC + match self.field: + case AgentResourceSlotOrderFieldGQL.SLOT_NAME: + return AgentResourceQueryOrders.slot_name(ascending) + case AgentResourceSlotOrderFieldGQL.CAPACITY: + return AgentResourceQueryOrders.capacity(ascending) + case AgentResourceSlotOrderFieldGQL.USED: + return AgentResourceQueryOrders.used(ascending) + case _: + raise ValueError(f"Unhandled AgentResourceSlotOrderFieldGQL value: {self.field!r}") + + +# ========== KernelResourceAllocationGQL (Node) ========== + + +@strawberry.type( + name="KernelResourceAllocation", + description=dedent_strip(""" + Added in 26.3.0. Per-slot resource allocation entry for a kernel. + Represents one row from the resource_allocations table. + """), +) +class KernelResourceAllocationGQL(Node): + """Per-kernel, per-slot resource allocation.""" + + id: NodeID[str] + slot_name: str = strawberry.field( + description="Resource slot identifier (e.g., 'cpu', 'mem', 'cuda.device')." + ) + requested: Decimal = strawberry.field( + description="Amount of this resource slot originally requested for the kernel." + ) + used: Decimal | None = strawberry.field( + description="Amount currently used. May be null if not yet measured." + ) + + @classmethod + async def resolve_nodes( # type: ignore[override] + cls, + *, + info: Info[StrawberryGQLContext], + node_ids: Iterable[str], + required: bool = False, + ) -> Iterable[Self | None]: + # Node ID format: "{kernel_id}:{slot_name}" + from ai.backend.manager.api.gql.resource_slot.fetcher import load_kernel_allocation_data + from ai.backend.manager.errors.resource_slot import ResourceAllocationNotFound + + results: list[Self | None] = [] + for node_id in node_ids: + kernel_id_str, _, slot_name = node_id.partition(":") + try: + data = await load_kernel_allocation_data(info, kernel_id_str, slot_name) + except ResourceAllocationNotFound: + if required: + raise + results.append(None) + else: + results.append(cls.from_data(data)) + return results + + @classmethod + def from_data(cls, data: ResourceAllocationData) -> Self: + node_id = f"{data.kernel_id}:{data.slot_name}" + return cls( + id=ID(node_id), + slot_name=data.slot_name, + requested=data.requested, + used=data.used, + ) + + +# ========== KernelResourceAllocation Filter/OrderBy ========== + + +@strawberry.enum( + name="KernelResourceAllocationOrderField", + description="Added in 26.3.0. Fields available for ordering kernel resource allocations.", +) +class KernelResourceAllocationOrderFieldGQL(StrEnum): + SLOT_NAME = "slot_name" + REQUESTED = "requested" + USED = "used" + + +@strawberry.input( + name="KernelResourceAllocationFilter", + description="Added in 26.3.0. Filter criteria for querying kernel resource allocations.", +) +class KernelResourceAllocationFilterGQL(GQLFilter): + slot_name: StringFilter | None = None + + def build_conditions(self) -> list[QueryCondition]: + conditions: list[QueryCondition] = [] + if self.slot_name: + condition = self.slot_name.build_query_condition( + contains_factory=ResourceAllocationQueryConditions.by_slot_name_contains, + equals_factory=ResourceAllocationQueryConditions.by_slot_name_equals, + starts_with_factory=ResourceAllocationQueryConditions.by_slot_name_starts_with, + ends_with_factory=ResourceAllocationQueryConditions.by_slot_name_ends_with, + ) + if condition: + conditions.append(condition) + return conditions + + +@strawberry.input( + name="KernelResourceAllocationOrderBy", + description="Added in 26.3.0. Ordering specification for kernel resource allocations.", +) +class KernelResourceAllocationOrderByGQL(GQLOrderBy): + field: KernelResourceAllocationOrderFieldGQL + direction: OrderDirection = OrderDirection.ASC + + def to_query_order(self) -> QueryOrder: + ascending = self.direction == OrderDirection.ASC + match self.field: + case KernelResourceAllocationOrderFieldGQL.SLOT_NAME: + return ResourceAllocationQueryOrders.slot_name(ascending) + case KernelResourceAllocationOrderFieldGQL.REQUESTED: + return ResourceAllocationQueryOrders.requested(ascending) + case KernelResourceAllocationOrderFieldGQL.USED: + return ResourceAllocationQueryOrders.used(ascending) + case _: + raise ValueError( + f"Unhandled KernelResourceAllocationOrderFieldGQL value: {self.field!r}" + ) + + +KernelResourceAllocationEdgeGQL = Edge[KernelResourceAllocationGQL] + + +@strawberry.type( + name="ResourceAllocationConnection", + description="Added in 26.3.0. Relay-style connection for per-slot kernel resource allocations.", +) +class ResourceAllocationConnectionGQL(Connection[KernelResourceAllocationGQL]): + count: int + + def __init__(self, *args: Any, count: int, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + self.count = count diff --git a/src/ai/backend/manager/api/gql/schema.py b/src/ai/backend/manager/api/gql/schema.py index 70852ca2dc6..2c73cfe6d78 100644 --- a/src/ai/backend/manager/api/gql/schema.py +++ b/src/ai/backend/manager/api/gql/schema.py @@ -197,6 +197,7 @@ resource_groups, update_resource_group_fair_share_spec, ) +from .resource_slot.resolver import resource_slot_type, resource_slot_types from .resource_usage import ( admin_domain_usage_buckets, admin_project_usage_buckets, @@ -306,6 +307,8 @@ class Query: admin_images_v2 = admin_images_v2 admin_kernels_v2 = admin_kernels_v2 admin_sessions_v2 = admin_sessions_v2 + resource_slot_type = resource_slot_type + resource_slot_types = resource_slot_types admin_image_aliases = admin_image_aliases # RBAC Admin APIs admin_role = admin_role diff --git a/src/ai/backend/manager/errors/resource_slot.py b/src/ai/backend/manager/errors/resource_slot.py index 36babe58b9d..3bd35f2ede1 100644 --- a/src/ai/backend/manager/errors/resource_slot.py +++ b/src/ai/backend/manager/errors/resource_slot.py @@ -27,6 +27,34 @@ def error_code(self) -> ErrorCode: ) +class AgentResourceNotFound(BackendAIError): + """Raised when an agent resource entry for a given agent+slot is not found.""" + + error_type = "https://api.backend.ai/probs/agent-resource-not-found" + error_title = "Agent resource not found." + + def error_code(self) -> ErrorCode: + return ErrorCode( + domain=ErrorDomain.AGENT, + operation=ErrorOperation.READ, + error_detail=ErrorDetail.NOT_FOUND, + ) + + +class ResourceAllocationNotFound(BackendAIError): + """Raised when a resource allocation entry for a given kernel+slot is not found.""" + + error_type = "https://api.backend.ai/probs/resource-allocation-not-found" + error_title = "Resource allocation not found." + + def error_code(self) -> ErrorCode: + return ErrorCode( + domain=ErrorDomain.KERNEL, + operation=ErrorOperation.READ, + error_detail=ErrorDetail.NOT_FOUND, + ) + + class AgentResourceCapacityExceeded(BackendAIError): """Raised when an agent resource update would exceed the slot capacity.""" diff --git a/src/ai/backend/manager/repositories/resource_slot/db_source/db_source.py b/src/ai/backend/manager/repositories/resource_slot/db_source/db_source.py index b8a38863be1..065b6569434 100644 --- a/src/ai/backend/manager/repositories/resource_slot/db_source/db_source.py +++ b/src/ai/backend/manager/repositories/resource_slot/db_source/db_source.py @@ -20,6 +20,8 @@ ResourceSlotTypeSearchResult, ) from ai.backend.manager.errors.resource_slot import ( + AgentResourceNotFound, + ResourceAllocationNotFound, ResourceSlotTypeNotFound, ) from ai.backend.manager.models.kernel import KernelRow @@ -135,6 +137,25 @@ async def get_agent_resources(self, agent_id: str) -> list[AgentResourceRow]: result = await db_sess.execute(stmt) return list(result.scalars().all()) + async def get_agent_resource_by_slot(self, agent_id: str, slot_name: str) -> AgentResourceRow: + """Get a single slot capacity/usage row for a specific agent+slot combination. + + Raises: + AgentResourceNotFound: If no entry exists for the given agent and slot. + """ + async with self._db.begin_readonly_session_read_committed() as db_sess: + stmt = sa.select(AgentResourceRow).where( + AgentResourceRow.agent_id == agent_id, + AgentResourceRow.slot_name == slot_name, + ) + result = await db_sess.execute(stmt) + row = result.scalar_one_or_none() + if row is None: + raise AgentResourceNotFound( + f"Agent resource not found for agent='{agent_id}', slot='{slot_name}'." + ) + return row + async def search_agent_resources(self, querier: BatchQuerier) -> AgentResourceSearchResult: # Paginated search across all agent_resources rows. # Caller injects conditions (e.g. by_slot_name, by_agent_id) via querier. @@ -170,6 +191,27 @@ async def get_kernel_allocations(self, kernel_id: uuid.UUID) -> list[ResourceAll result = await db_sess.execute(stmt) return list(result.scalars().all()) + async def get_kernel_allocation_by_slot( + self, kernel_id: uuid.UUID, slot_name: str + ) -> ResourceAllocationRow: + """Get a single allocation row for a specific kernel+slot combination. + + Raises: + ResourceAllocationNotFound: If no entry exists for the given kernel and slot. + """ + async with self._db.begin_readonly_session_read_committed() as db_sess: + stmt = sa.select(ResourceAllocationRow).where( + ResourceAllocationRow.kernel_id == kernel_id, + ResourceAllocationRow.slot_name == slot_name, + ) + result = await db_sess.execute(stmt) + row = result.scalar_one_or_none() + if row is None: + raise ResourceAllocationNotFound( + f"Resource allocation not found for kernel='{kernel_id}', slot='{slot_name}'." + ) + return row + async def search_resource_allocations( self, querier: BatchQuerier ) -> ResourceAllocationSearchResult: diff --git a/src/ai/backend/manager/repositories/resource_slot/query.py b/src/ai/backend/manager/repositories/resource_slot/query.py index d584d685992..6a6a9771e4e 100644 --- a/src/ai/backend/manager/repositories/resource_slot/query.py +++ b/src/ai/backend/manager/repositories/resource_slot/query.py @@ -1,9 +1,15 @@ from __future__ import annotations +import uuid + import sqlalchemy as sa from ai.backend.common.data.filter_specs import StringMatchSpec -from ai.backend.manager.models.resource_slot import ResourceSlotTypeRow +from ai.backend.manager.models.resource_slot import ( + AgentResourceRow, + ResourceAllocationRow, + ResourceSlotTypeRow, +) from ai.backend.manager.repositories.base import QueryCondition, QueryOrder @@ -183,3 +189,213 @@ def display_name(ascending: bool = True) -> QueryOrder: if ascending: return ResourceSlotTypeRow.display_name.asc() return ResourceSlotTypeRow.display_name.desc() + + +class CursorConditions: + @staticmethod + def by_cursor_forward(cursor_slot_name: str) -> QueryCondition: + """Cursor condition for forward pagination (after cursor). slot_name is the primary key.""" + + def inner() -> sa.sql.expression.ColumnElement[bool]: + return ResourceSlotTypeRow.slot_name > cursor_slot_name + + return inner + + @staticmethod + def by_cursor_backward(cursor_slot_name: str) -> QueryCondition: + """Cursor condition for backward pagination (before cursor). slot_name is the primary key.""" + + def inner() -> sa.sql.expression.ColumnElement[bool]: + return ResourceSlotTypeRow.slot_name < cursor_slot_name + + return inner + + +class AgentResourceQueryConditions: + @staticmethod + def by_agent_id(agent_id: str) -> QueryCondition: + def inner() -> sa.sql.expression.ColumnElement[bool]: + return AgentResourceRow.agent_id == agent_id + + return inner + + @staticmethod + def by_slot_name_contains(spec: StringMatchSpec) -> QueryCondition: + def inner() -> sa.sql.expression.ColumnElement[bool]: + if spec.case_insensitive: + condition = AgentResourceRow.slot_name.ilike(f"%{spec.value}%") + else: + condition = AgentResourceRow.slot_name.like(f"%{spec.value}%") + if spec.negated: + condition = sa.not_(condition) + return condition + + return inner + + @staticmethod + def by_slot_name_equals(spec: StringMatchSpec) -> QueryCondition: + def inner() -> sa.sql.expression.ColumnElement[bool]: + if spec.case_insensitive: + condition = sa.func.lower(AgentResourceRow.slot_name) == spec.value.lower() + else: + condition = AgentResourceRow.slot_name == spec.value + if spec.negated: + condition = sa.not_(condition) + return condition + + return inner + + @staticmethod + def by_slot_name_starts_with(spec: StringMatchSpec) -> QueryCondition: + def inner() -> sa.sql.expression.ColumnElement[bool]: + if spec.case_insensitive: + condition = AgentResourceRow.slot_name.ilike(f"{spec.value}%") + else: + condition = AgentResourceRow.slot_name.like(f"{spec.value}%") + if spec.negated: + condition = sa.not_(condition) + return condition + + return inner + + @staticmethod + def by_slot_name_ends_with(spec: StringMatchSpec) -> QueryCondition: + def inner() -> sa.sql.expression.ColumnElement[bool]: + if spec.case_insensitive: + condition = AgentResourceRow.slot_name.ilike(f"%{spec.value}") + else: + condition = AgentResourceRow.slot_name.like(f"%{spec.value}") + if spec.negated: + condition = sa.not_(condition) + return condition + + return inner + + @staticmethod + def by_cursor_forward(cursor_slot_name: str) -> QueryCondition: + def inner() -> sa.sql.expression.ColumnElement[bool]: + return AgentResourceRow.slot_name > cursor_slot_name + + return inner + + @staticmethod + def by_cursor_backward(cursor_slot_name: str) -> QueryCondition: + def inner() -> sa.sql.expression.ColumnElement[bool]: + return AgentResourceRow.slot_name < cursor_slot_name + + return inner + + +class AgentResourceQueryOrders: + @staticmethod + def slot_name(ascending: bool = True) -> QueryOrder: + if ascending: + return AgentResourceRow.slot_name.asc() + return AgentResourceRow.slot_name.desc() + + @staticmethod + def capacity(ascending: bool = True) -> QueryOrder: + if ascending: + return AgentResourceRow.capacity.asc() + return AgentResourceRow.capacity.desc() + + @staticmethod + def used(ascending: bool = True) -> QueryOrder: + if ascending: + return AgentResourceRow.used.asc() + return AgentResourceRow.used.desc() + + +class ResourceAllocationQueryConditions: + @staticmethod + def by_kernel_id(kernel_id: uuid.UUID) -> QueryCondition: + def inner() -> sa.sql.expression.ColumnElement[bool]: + return ResourceAllocationRow.kernel_id == kernel_id + + return inner + + @staticmethod + def by_slot_name_contains(spec: StringMatchSpec) -> QueryCondition: + def inner() -> sa.sql.expression.ColumnElement[bool]: + if spec.case_insensitive: + condition = ResourceAllocationRow.slot_name.ilike(f"%{spec.value}%") + else: + condition = ResourceAllocationRow.slot_name.like(f"%{spec.value}%") + if spec.negated: + condition = sa.not_(condition) + return condition + + return inner + + @staticmethod + def by_slot_name_equals(spec: StringMatchSpec) -> QueryCondition: + def inner() -> sa.sql.expression.ColumnElement[bool]: + if spec.case_insensitive: + condition = sa.func.lower(ResourceAllocationRow.slot_name) == spec.value.lower() + else: + condition = ResourceAllocationRow.slot_name == spec.value + if spec.negated: + condition = sa.not_(condition) + return condition + + return inner + + @staticmethod + def by_slot_name_starts_with(spec: StringMatchSpec) -> QueryCondition: + def inner() -> sa.sql.expression.ColumnElement[bool]: + if spec.case_insensitive: + condition = ResourceAllocationRow.slot_name.ilike(f"{spec.value}%") + else: + condition = ResourceAllocationRow.slot_name.like(f"{spec.value}%") + if spec.negated: + condition = sa.not_(condition) + return condition + + return inner + + @staticmethod + def by_slot_name_ends_with(spec: StringMatchSpec) -> QueryCondition: + def inner() -> sa.sql.expression.ColumnElement[bool]: + if spec.case_insensitive: + condition = ResourceAllocationRow.slot_name.ilike(f"%{spec.value}") + else: + condition = ResourceAllocationRow.slot_name.like(f"%{spec.value}") + if spec.negated: + condition = sa.not_(condition) + return condition + + return inner + + @staticmethod + def by_cursor_forward(cursor_slot_name: str) -> QueryCondition: + def inner() -> sa.sql.expression.ColumnElement[bool]: + return ResourceAllocationRow.slot_name > cursor_slot_name + + return inner + + @staticmethod + def by_cursor_backward(cursor_slot_name: str) -> QueryCondition: + def inner() -> sa.sql.expression.ColumnElement[bool]: + return ResourceAllocationRow.slot_name < cursor_slot_name + + return inner + + +class ResourceAllocationQueryOrders: + @staticmethod + def slot_name(ascending: bool = True) -> QueryOrder: + if ascending: + return ResourceAllocationRow.slot_name.asc() + return ResourceAllocationRow.slot_name.desc() + + @staticmethod + def requested(ascending: bool = True) -> QueryOrder: + if ascending: + return ResourceAllocationRow.requested.asc() + return ResourceAllocationRow.requested.desc() + + @staticmethod + def used(ascending: bool = True) -> QueryOrder: + if ascending: + return ResourceAllocationRow.used.asc() + return ResourceAllocationRow.used.desc() diff --git a/src/ai/backend/manager/repositories/resource_slot/repository.py b/src/ai/backend/manager/repositories/resource_slot/repository.py index 08499ae8420..c9b445e56a4 100644 --- a/src/ai/backend/manager/repositories/resource_slot/repository.py +++ b/src/ai/backend/manager/repositories/resource_slot/repository.py @@ -89,6 +89,11 @@ async def get_agent_resources(self, agent_id: str) -> list[AgentResourceRow]: """Get all slot capacity/usage rows for a given agent.""" return await self._db_source.get_agent_resources(agent_id) + @resource_slot_repository_resilience.apply() + async def get_agent_resource_by_slot(self, agent_id: str, slot_name: str) -> AgentResourceRow: + """Get a single slot row for one agent+slot combination.""" + return await self._db_source.get_agent_resource_by_slot(agent_id, slot_name) + @resource_slot_repository_resilience.apply() async def search_agent_resources(self, querier: BatchQuerier) -> AgentResourceSearchResult: return await self._db_source.search_agent_resources(querier) @@ -100,6 +105,13 @@ async def get_kernel_allocations(self, kernel_id: uuid.UUID) -> list[ResourceAll """Get all per-slot allocation rows for a given kernel.""" return await self._db_source.get_kernel_allocations(kernel_id) + @resource_slot_repository_resilience.apply() + async def get_kernel_allocation_by_slot( + self, kernel_id: uuid.UUID, slot_name: str + ) -> ResourceAllocationRow: + """Get a single allocation row for one kernel+slot combination.""" + return await self._db_source.get_kernel_allocation_by_slot(kernel_id, slot_name) + @resource_slot_repository_resilience.apply() async def search_resource_allocations( self, querier: BatchQuerier diff --git a/src/ai/backend/manager/services/resource_slot/actions/__init__.py b/src/ai/backend/manager/services/resource_slot/actions/__init__.py index 656154a698d..6124c62e96b 100644 --- a/src/ai/backend/manager/services/resource_slot/actions/__init__.py +++ b/src/ai/backend/manager/services/resource_slot/actions/__init__.py @@ -1,8 +1,16 @@ +from .get_agent_resource_by_slot import ( + GetAgentResourceBySlotAction, + GetAgentResourceBySlotResult, +) from .get_agent_resources import GetAgentResourcesAction, GetAgentResourcesResult from .get_domain_resource_overview import ( GetDomainResourceOverviewAction, GetDomainResourceOverviewResult, ) +from .get_kernel_allocation_by_slot import ( + GetKernelAllocationBySlotAction, + GetKernelAllocationBySlotResult, +) from .get_kernel_allocations import GetKernelAllocationsAction, GetKernelAllocationsResult from .get_project_resource_overview import ( GetProjectResourceOverviewAction, @@ -17,10 +25,14 @@ from .search_resource_slot_types import SearchResourceSlotTypesAction, SearchResourceSlotTypesResult __all__ = ( + "GetAgentResourceBySlotAction", + "GetAgentResourceBySlotResult", "GetAgentResourcesAction", "GetAgentResourcesResult", "GetDomainResourceOverviewAction", "GetDomainResourceOverviewResult", + "GetKernelAllocationBySlotAction", + "GetKernelAllocationBySlotResult", "GetKernelAllocationsAction", "GetKernelAllocationsResult", "GetProjectResourceOverviewAction", diff --git a/src/ai/backend/manager/services/resource_slot/actions/get_agent_resource_by_slot.py b/src/ai/backend/manager/services/resource_slot/actions/get_agent_resource_by_slot.py new file mode 100644 index 00000000000..b68626e5366 --- /dev/null +++ b/src/ai/backend/manager/services/resource_slot/actions/get_agent_resource_by_slot.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import override + +from ai.backend.common.data.permission.types import EntityType +from ai.backend.manager.actions.action import BaseActionResult +from ai.backend.manager.actions.types import ActionOperationType +from ai.backend.manager.data.resource_slot.types import AgentResourceData + +from .base import ResourceSlotAction + + +@dataclass +class GetAgentResourceBySlotAction(ResourceSlotAction): + agent_id: str + slot_name: str + + @override + @classmethod + def entity_type(cls) -> EntityType: + return EntityType.AGENT_RESOURCE + + @override + @classmethod + def operation_type(cls) -> ActionOperationType: + return ActionOperationType.GET + + @override + def entity_id(self) -> str | None: + return f"{self.agent_id}:{self.slot_name}" + + +@dataclass +class GetAgentResourceBySlotResult(BaseActionResult): + item: AgentResourceData + + @override + def entity_id(self) -> str | None: + return None diff --git a/src/ai/backend/manager/services/resource_slot/actions/get_kernel_allocation_by_slot.py b/src/ai/backend/manager/services/resource_slot/actions/get_kernel_allocation_by_slot.py new file mode 100644 index 00000000000..4c2bf4c5ad6 --- /dev/null +++ b/src/ai/backend/manager/services/resource_slot/actions/get_kernel_allocation_by_slot.py @@ -0,0 +1,41 @@ +from __future__ import annotations + +import uuid +from dataclasses import dataclass +from typing import override + +from ai.backend.common.data.permission.types import EntityType +from ai.backend.manager.actions.action import BaseActionResult +from ai.backend.manager.actions.types import ActionOperationType +from ai.backend.manager.data.resource_slot.types import ResourceAllocationData + +from .base import ResourceSlotAction + + +@dataclass +class GetKernelAllocationBySlotAction(ResourceSlotAction): + kernel_id: uuid.UUID + slot_name: str + + @override + @classmethod + def entity_type(cls) -> EntityType: + return EntityType.RESOURCE_ALLOCATION + + @override + @classmethod + def operation_type(cls) -> ActionOperationType: + return ActionOperationType.GET + + @override + def entity_id(self) -> str | None: + return f"{self.kernel_id}:{self.slot_name}" + + +@dataclass +class GetKernelAllocationBySlotResult(BaseActionResult): + item: ResourceAllocationData + + @override + def entity_id(self) -> str | None: + return None diff --git a/src/ai/backend/manager/services/resource_slot/processors.py b/src/ai/backend/manager/services/resource_slot/processors.py index db836645204..970a4180e24 100644 --- a/src/ai/backend/manager/services/resource_slot/processors.py +++ b/src/ai/backend/manager/services/resource_slot/processors.py @@ -7,10 +7,14 @@ from ai.backend.manager.actions.types import AbstractProcessorPackage, ActionSpec from .actions import ( + GetAgentResourceBySlotAction, + GetAgentResourceBySlotResult, GetAgentResourcesAction, GetAgentResourcesResult, GetDomainResourceOverviewAction, GetDomainResourceOverviewResult, + GetKernelAllocationBySlotAction, + GetKernelAllocationBySlotResult, GetKernelAllocationsAction, GetKernelAllocationsResult, GetProjectResourceOverviewAction, @@ -28,7 +32,13 @@ class ResourceSlotProcessors(AbstractProcessorPackage): + get_agent_resource_by_slot: ActionProcessor[ + GetAgentResourceBySlotAction, GetAgentResourceBySlotResult + ] get_agent_resources: ActionProcessor[GetAgentResourcesAction, GetAgentResourcesResult] + get_kernel_allocation_by_slot: ActionProcessor[ + GetKernelAllocationBySlotAction, GetKernelAllocationBySlotResult + ] search_agent_resources: ActionProcessor[SearchAgentResourcesAction, SearchAgentResourcesResult] get_kernel_allocations: ActionProcessor[GetKernelAllocationsAction, GetKernelAllocationsResult] search_resource_allocations: ActionProcessor[ @@ -46,7 +56,13 @@ class ResourceSlotProcessors(AbstractProcessorPackage): ] def __init__(self, service: ResourceSlotService, action_monitors: list[ActionMonitor]) -> None: + self.get_agent_resource_by_slot = ActionProcessor( + service.get_agent_resource_by_slot, action_monitors + ) self.get_agent_resources = ActionProcessor(service.get_agent_resources, action_monitors) + self.get_kernel_allocation_by_slot = ActionProcessor( + service.get_kernel_allocation_by_slot, action_monitors + ) self.search_agent_resources = ActionProcessor( service.search_agent_resources, action_monitors ) @@ -72,7 +88,9 @@ def __init__(self, service: ResourceSlotService, action_monitors: list[ActionMon @override def supported_actions(self) -> list[ActionSpec]: return [ + GetAgentResourceBySlotAction.spec(), GetAgentResourcesAction.spec(), + GetKernelAllocationBySlotAction.spec(), SearchAgentResourcesAction.spec(), GetKernelAllocationsAction.spec(), SearchResourceAllocationsAction.spec(), diff --git a/src/ai/backend/manager/services/resource_slot/service.py b/src/ai/backend/manager/services/resource_slot/service.py index bc771877f79..bba72cd1b96 100644 --- a/src/ai/backend/manager/services/resource_slot/service.py +++ b/src/ai/backend/manager/services/resource_slot/service.py @@ -7,13 +7,22 @@ ResourceOccupancy, ResourceSlotTypeData, ) +from ai.backend.manager.models.resource_slot import ResourceSlotTypeRow from ai.backend.manager.repositories.resource_slot.repository import ResourceSlotRepository +from .actions.get_agent_resource_by_slot import ( + GetAgentResourceBySlotAction, + GetAgentResourceBySlotResult, +) from .actions.get_agent_resources import GetAgentResourcesAction, GetAgentResourcesResult from .actions.get_domain_resource_overview import ( GetDomainResourceOverviewAction, GetDomainResourceOverviewResult, ) +from .actions.get_kernel_allocation_by_slot import ( + GetKernelAllocationBySlotAction, + GetKernelAllocationBySlotResult, +) from .actions.get_kernel_allocations import GetKernelAllocationsAction, GetKernelAllocationsResult from .actions.get_project_resource_overview import ( GetProjectResourceOverviewAction, @@ -31,12 +40,41 @@ ) +def _row_to_slot_type_data(row: ResourceSlotTypeRow) -> ResourceSlotTypeData: + return ResourceSlotTypeData( + slot_name=row.slot_name, + slot_type=row.slot_type, + display_name=row.display_name, + description=row.description, + display_unit=row.display_unit, + display_icon=row.display_icon, + number_format=NumberFormatData( + binary=row.number_format.binary, + round_length=row.number_format.round_length, + ), + rank=row.rank, + ) + + class ResourceSlotService: _repository: ResourceSlotRepository def __init__(self, repository: ResourceSlotRepository) -> None: self._repository = repository + async def get_agent_resource_by_slot( + self, action: GetAgentResourceBySlotAction + ) -> GetAgentResourceBySlotResult: + row = await self._repository.get_agent_resource_by_slot(action.agent_id, action.slot_name) + return GetAgentResourceBySlotResult( + item=AgentResourceData( + agent_id=row.agent_id, + slot_name=row.slot_name, + capacity=row.capacity, + used=row.used, + ) + ) + async def get_agent_resources(self, action: GetAgentResourcesAction) -> GetAgentResourcesResult: rows = await self._repository.get_agent_resources(action.agent_id) items = [ @@ -61,6 +99,21 @@ async def search_agent_resources( has_previous_page=result.has_previous_page, ) + async def get_kernel_allocation_by_slot( + self, action: GetKernelAllocationBySlotAction + ) -> GetKernelAllocationBySlotResult: + row = await self._repository.get_kernel_allocation_by_slot( + action.kernel_id, action.slot_name + ) + return GetKernelAllocationBySlotResult( + item=ResourceAllocationData( + kernel_id=row.kernel_id, + slot_name=row.slot_name, + requested=row.requested, + used=row.used, + ) + ) + async def get_kernel_allocations( self, action: GetKernelAllocationsAction ) -> GetKernelAllocationsResult: @@ -91,20 +144,7 @@ async def get_resource_slot_type( self, action: GetResourceSlotTypeAction ) -> GetResourceSlotTypeResult: row = await self._repository.get_slot_type(action.slot_name) - item = ResourceSlotTypeData( - slot_name=row.slot_name, - slot_type=row.slot_type, - display_name=row.display_name, - description=row.description, - display_unit=row.display_unit, - display_icon=row.display_icon, - number_format=NumberFormatData( - binary=row.number_format.binary, - round_length=row.number_format.round_length, - ), - rank=row.rank, - ) - return GetResourceSlotTypeResult(item=item) + return GetResourceSlotTypeResult(item=_row_to_slot_type_data(row)) async def search_resource_slot_types( self, action: SearchResourceSlotTypesAction