From c79e04efedde55e9a97a65f88a79f6507b47e9f3 Mon Sep 17 00:00:00 2001 From: Sanghun Lee Date: Fri, 7 Jun 2024 14:17:36 +0900 Subject: [PATCH 1/2] enhance: fetch all containers eagerly when sync containers --- src/ai/backend/agent/agent.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/ai/backend/agent/agent.py b/src/ai/backend/agent/agent.py index 3b47fae6b7e..f8740dacdee 100644 --- a/src/ai/backend/agent/agent.py +++ b/src/ai/backend/agent/agent.py @@ -1259,10 +1259,17 @@ async def sync_container_lifecycles(self, interval: float) -> None: own_kernels: dict[KernelId, ContainerId] = {} terminated_kernels = {} + _containers = await self.enumerate_containers(DEAD_STATUS_SET | ACTIVE_STATUS_SET) + async with self.registry_lock: try: # Check if: there are dead containers - for kernel_id, container in await self.enumerate_containers(DEAD_STATUS_SET): + dead_containers = [ + (kid, container) + for kid, container in _containers + if container.status in DEAD_STATUS_SET + ] + for kernel_id, container in dead_containers: if ( kernel_id in self.restarting_kernels or kernel_id in self.terminating_kernels @@ -1281,7 +1288,12 @@ async def sync_container_lifecycles(self, interval: float) -> None: LifecycleEvent.CLEAN, KernelLifecycleEventReason.SELF_TERMINATED, ) - for kernel_id, container in await self.enumerate_containers(ACTIVE_STATUS_SET): + alive_containers = [ + (kid, container) + for kid, container in _containers + if container.status in ACTIVE_STATUS_SET + ] + for kernel_id, container in alive_containers: alive_kernels[kernel_id] = container.id session_id = SessionId(UUID(container.labels["ai.backend.session-id"])) kernel_session_map[kernel_id] = session_id From 2ce05fa9f40a6fd9e7c04596b4c9c8eb091a2fc2 Mon Sep 17 00:00:00 2001 From: Sanghun Lee Date: Fri, 7 Jun 2024 16:56:30 +0900 Subject: [PATCH 2/2] add news fragment --- changes/2263.enhance.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changes/2263.enhance.md diff --git a/changes/2263.enhance.md b/changes/2263.enhance.md new file mode 100644 index 00000000000..9d294a283e7 --- /dev/null +++ b/changes/2263.enhance.md @@ -0,0 +1 @@ +Fetch all containers eagerly when matching agent's registry to containers.