diff --git a/changes/9568.feature.md b/changes/9568.feature.md
new file mode 100644
index 00000000000..c956a901688
--- /dev/null
+++ b/changes/9568.feature.md
@@ -0,0 +1 @@
+Implement Blue-Green deployment strategy
diff --git a/src/ai/backend/manager/data/deployment/types.py b/src/ai/backend/manager/data/deployment/types.py
index ccee6a29ed1..28a3ff8e050 100644
--- a/src/ai/backend/manager/data/deployment/types.py
+++ b/src/ai/backend/manager/data/deployment/types.py
@@ -405,6 +405,7 @@ class RouteInfo:
     created_at: datetime | None
     revision_id: UUID | None
     traffic_status: RouteTrafficStatus
+    status_updated_at: datetime | None = None
     error_data: dict[str, Any] = field(default_factory=dict)
 
 
diff --git a/src/ai/backend/manager/models/routing/row.py b/src/ai/backend/manager/models/routing/row.py
index 51a9d9c1f9f..37ac464c8ce 100644
--- a/src/ai/backend/manager/models/routing/row.py
+++ b/src/ai/backend/manager/models/routing/row.py
@@ -95,6 +95,12 @@ class RoutingRow(Base):  # type: ignore[misc]
 
     # Revision reference without FK (relationship only)
     revision: Mapped[uuid.UUID | None] = mapped_column("revision", GUID, nullable=True)
+    status_updated_at: Mapped[datetime | None] = mapped_column(
+        "status_updated_at",
+        sa.DateTime(timezone=True),
+        server_default=sa.text("now()"),
+        nullable=True,
+    )
     traffic_status: Mapped[RouteTrafficStatus] = mapped_column(
         "traffic_status",
         EnumValueType(RouteTrafficStatus),
@@ -255,5 +261,6 @@ def to_route_info(self) -> RouteInfo:
             created_at=self.created_at,
             revision_id=self.revision,
             traffic_status=self.traffic_status,
+            status_updated_at=self.status_updated_at,
             error_data=self.error_data or {},
         )
diff --git a/src/ai/backend/manager/repositories/deployment/creators/route.py b/src/ai/backend/manager/repositories/deployment/creators/route.py
index 2b313d7c172..254b6c087f4 100644
--- a/src/ai/backend/manager/repositories/deployment/creators/route.py
+++ b/src/ai/backend/manager/repositories/deployment/creators/route.py
@@ -4,6 +4,7 @@
 
 import uuid
 from dataclasses import dataclass
+from datetime import UTC, datetime
 from typing import Any, override
 
 from ai.backend.manager.data.deployment.types import RouteStatus, RouteTrafficStatus
@@ -66,6 +67,7 @@ def build_values(self) -> dict[str, Any]:
         values: dict[str, Any] = {}
         if self.status is not None:
             values["status"] = self.status
+            values["status_updated_at"] = datetime.now(UTC)
         if self.traffic_ratio is not None:
             values["traffic_ratio"] = self.traffic_ratio
         if self.traffic_status is not None:
diff --git a/src/ai/backend/manager/repositories/deployment/db_source/db_source.py b/src/ai/backend/manager/repositories/deployment/db_source/db_source.py
index 38aa75d45f2..442b901e5ef 100644
--- a/src/ai/backend/manager/repositories/deployment/db_source/db_source.py
+++ b/src/ai/backend/manager/repositories/deployment/db_source/db_source.py
@@ -1406,12 +1406,39 @@ async def fetch_active_routes_by_endpoint_ids(
                 routes_by_endpoint[row.endpoint].append(row.to_route_info())
             return routes_by_endpoint
 
+    async def fetch_routes_by_endpoint_ids(
+        self,
+        endpoint_ids: set[uuid.UUID],
+    ) -> Mapping[uuid.UUID, list[RouteInfo]]:
+        """Fetch all routes for given endpoint IDs (no status filter).
+
+        Unlike fetch_active_routes_by_endpoint_ids, this includes routes
+        in all statuses (FAILED_TO_START, TERMINATED, etc.), which is
+        required for blue-green rollback detection.
+        """
+        if not endpoint_ids:
+            return {}
+
+        async with self._begin_readonly_session_read_committed() as db_sess:
+            query = sa.select(RoutingRow).where(
+                RoutingRow.endpoint.in_(endpoint_ids),
+            )
+            result = await db_sess.execute(query)
+            rows: Sequence[RoutingRow] = result.scalars().all()
+            routes_by_endpoint: defaultdict[uuid.UUID, list[RouteInfo]] = defaultdict(list)
+            for row in rows:
+                if row.endpoint not in routes_by_endpoint:
+                    routes_by_endpoint[row.endpoint] = []
+                routes_by_endpoint[row.endpoint].append(row.to_route_info())
+            return routes_by_endpoint
+
     async def scale_routes(
         self,
         scale_out_creators: Sequence[Creator[RoutingRow]],
         scale_in_updater: BatchUpdater[RoutingRow] | None,
+        promote_updater: BatchUpdater[RoutingRow] | None = None,
     ) -> None:
-        """Scale out/in routes based on provided creators and updater."""
+        """Scale out/in/promote routes based on provided creators and updaters."""
         async with self._begin_session_read_committed() as db_sess:
             # Scale out routes
             for creator in scale_out_creators:
@@ -1419,6 +1446,9 @@ async def scale_routes(
             # Scale in routes
             if scale_in_updater:
                 await execute_batch_updater(db_sess, scale_in_updater)
+            # Promote routes (blue-green)
+            if promote_updater:
+                await execute_batch_updater(db_sess, promote_updater)
 
     # Route operations
 
diff --git a/src/ai/backend/manager/repositories/deployment/repository.py b/src/ai/backend/manager/repositories/deployment/repository.py
index d78f05ffe04..89b0aebfd10 100644
--- a/src/ai/backend/manager/repositories/deployment/repository.py
+++ b/src/ai/backend/manager/repositories/deployment/repository.py
@@ -548,13 +548,22 @@ async def fetch_active_routes_by_endpoint_ids(
         """Fetch routes for multiple endpoints."""
         return await self._db_source.fetch_active_routes_by_endpoint_ids(endpoint_ids)
 
+    @deployment_repository_resilience.apply()
+    async def fetch_routes_by_endpoint_ids(
+        self,
+        endpoint_ids: set[uuid.UUID],
+    ) -> Mapping[uuid.UUID, list[RouteInfo]]:
+        """Fetch all routes for multiple endpoints (no status filter)."""
+        return await self._db_source.fetch_routes_by_endpoint_ids(endpoint_ids)
+
     @deployment_repository_resilience.apply()
     async def scale_routes(
         self,
         scale_out_creators: Sequence[Creator[RoutingRow]],
         scale_in_updater: BatchUpdater[RoutingRow] | None,
+        promote_updater: BatchUpdater[RoutingRow] | None = None,
     ) -> None:
-        await self._db_source.scale_routes(scale_out_creators, scale_in_updater)
+        await self._db_source.scale_routes(scale_out_creators, scale_in_updater, promote_updater)
 
     # Route operations
 
diff --git a/src/ai/backend/manager/sokovan/deployment/coordinator.py b/src/ai/backend/manager/sokovan/deployment/coordinator.py
index d617fda9569..f78f03e74f0 100644
--- a/src/ai/backend/manager/sokovan/deployment/coordinator.py
+++ b/src/ai/backend/manager/sokovan/deployment/coordinator.py
@@ -498,7 +498,7 @@ async def _apply_route_changes(
     ) -> None:
         """Apply aggregated route mutations from the evaluation result."""
         changes = eval_result.route_changes
-        if not changes.rollout_specs and not changes.drain_route_ids:
+        if not changes.rollout_specs and not changes.drain_route_ids and not changes.promote_route_ids:
             return
 
         scale_in_updater: BatchUpdater[RoutingRow] | None = None
@@ -512,11 +512,24 @@ async def _apply_route_changes(
                 conditions=[RouteConditions.by_ids(changes.drain_route_ids)],
             )
 
-        await self._deployment_repository.scale_routes(changes.rollout_specs, scale_in_updater)
+        promote_updater: BatchUpdater[RoutingRow] | None = None
+        if changes.promote_route_ids:
+            promote_updater = BatchUpdater(
+                spec=RouteBatchUpdaterSpec(
+                    traffic_status=RouteTrafficStatus.ACTIVE,
+                    traffic_ratio=1.0,
+                ),
+                conditions=[RouteConditions.by_ids(changes.promote_route_ids)],
+            )
+
+        await self._deployment_repository.scale_routes(
+            changes.rollout_specs, scale_in_updater, promote_updater
+        )
         log.debug(
-            "Applied route changes: {} created, {} terminated",
+            "Applied route changes: {} created, {} terminated, {} promoted",
             len(changes.rollout_specs),
             len(changes.drain_route_ids),
+            len(changes.promote_route_ids),
         )
 
     async def _transition_completed_deployments(
diff --git a/src/ai/backend/manager/sokovan/deployment/strategy/blue_green.py b/src/ai/backend/manager/sokovan/deployment/strategy/blue_green.py
index 6e76625ed8b..a94221c8729 100644
--- a/src/ai/backend/manager/sokovan/deployment/strategy/blue_green.py
+++ b/src/ai/backend/manager/sokovan/deployment/strategy/blue_green.py
@@ -1,20 +1,31 @@
 """Blue-green deployment strategy evaluation for a single deployment cycle (BEP-1049).
 
-Provisions a full set of new-revision routes, validates them, then atomically
-switches traffic from the old revision to the new one.
+Provisions a full set of new-revision routes (INACTIVE), validates them, then
+atomically switches traffic from the old revision to the new one.
 """
 
 from __future__ import annotations
 
+import logging
 from collections.abc import Sequence
+from datetime import UTC, datetime
 
+from ai.backend.logging import BraceStyleAdapter
 from ai.backend.manager.data.deployment.types import (
     DeploymentInfo,
+    DeploymentSubStep,
     RouteInfo,
+    RouteStatus,
+    RouteTrafficStatus,
 )
 from ai.backend.manager.models.deployment_policy import BlueGreenSpec
+from ai.backend.manager.models.routing import RoutingRow
+from ai.backend.manager.repositories.base import Creator
+from ai.backend.manager.repositories.deployment.creators import RouteCreatorSpec
 
-from .types import CycleEvaluationResult
+from .types import CycleEvaluationResult, RouteChanges
+
+log = BraceStyleAdapter(logging.getLogger(__name__))
 
 
 def blue_green_evaluate(
@@ -22,5 +33,162 @@ def blue_green_evaluate(
     routes: Sequence[RouteInfo],
     spec: BlueGreenSpec,
 ) -> CycleEvaluationResult:
-    """Evaluate one cycle of blue-green deployment for a single deployment."""
-    raise NotImplementedError("Blue-green deployment strategy is not yet implemented")
+    """Evaluate one cycle of blue-green deployment for a single deployment.
+
+    FSM flow:
+        1. Classify routes into blue (old) / green (new) by revision_id.
+        2. If no green routes → create all green (INACTIVE) → PROVISIONING.
+        3. If any green PROVISIONING → PROVISIONING (wait).
+        4. If all green failed → scale_in green → ROLLED_BACK.
+        5. If not all green healthy → PROGRESSING (wait).
+        6. If all green healthy + auto_promote=False → PROGRESSING (manual wait).
+        7. If all green healthy + auto_promote=True + delay>0 → PROGRESSING (delay wait).
+        8. If all green healthy + auto_promote=True + delay=0 → promote + completed.
+    """
+    deploying_rev = deployment.deploying_revision_id
+    desired = deployment.replica_spec.target_replica_count
+
+    # ── 1. Classify routes ──
+    blue_active: list[RouteInfo] = []
+    green_provisioning: list[RouteInfo] = []
+    green_healthy: list[RouteInfo] = []
+    green_failed: list[RouteInfo] = []
+
+    for r in routes:
+        is_green = r.revision_id == deploying_rev
+        if not is_green:
+            if r.status.is_active():
+                blue_active.append(r)
+            continue
+
+        if r.status == RouteStatus.PROVISIONING:
+            green_provisioning.append(r)
+        elif r.status == RouteStatus.HEALTHY:
+            green_healthy.append(r)
+        elif r.status in (RouteStatus.FAILED_TO_START, RouteStatus.TERMINATED):
+            green_failed.append(r)
+        elif r.status.is_active():
+            green_healthy.append(r)
+
+    total_green_live = len(green_provisioning) + len(green_healthy)
+
+    # ── 2. No green routes → create all green (INACTIVE) ──
+    if total_green_live == 0 and not green_failed:
+        log.debug(
+            "deployment {}: no green routes — creating {} INACTIVE routes",
+            deployment.id,
+            desired,
+        )
+        route_changes = RouteChanges(
+            rollout_specs=_build_route_creators(deployment, desired),
+        )
+        return CycleEvaluationResult(
+            sub_step=DeploymentSubStep.PROVISIONING,
+            route_changes=route_changes,
+        )
+
+    # ── 3. Green PROVISIONING → wait ──
+    if green_provisioning:
+        log.debug(
+            "deployment {}: {} green routes still provisioning",
+            deployment.id,
+            len(green_provisioning),
+        )
+        return CycleEvaluationResult(sub_step=DeploymentSubStep.PROVISIONING)
+
+    # ── 4. All green failed → rollback ──
+    if total_green_live == 0 and green_failed:
+        log.warning(
+            "deployment {}: all {} green routes failed — rolling back",
+            deployment.id,
+            len(green_failed),
+        )
+        route_changes = RouteChanges(
+            drain_route_ids=[r.route_id for r in green_failed],
+        )
+        return CycleEvaluationResult(
+            sub_step=DeploymentSubStep.ROLLED_BACK,
+            route_changes=route_changes,
+        )
+
+    # ── 5. Not all green healthy → PROGRESSING (wait) ──
+    if len(green_healthy) < desired:
+        log.debug(
+            "deployment {}: green healthy={}/{} — waiting",
+            deployment.id,
+            len(green_healthy),
+            desired,
+        )
+        return CycleEvaluationResult(sub_step=DeploymentSubStep.PROGRESSING)
+
+    # ── All green healthy from here ──
+
+    # ── 6. auto_promote=False → PROGRESSING (manual wait) ──
+    if not spec.auto_promote:
+        log.debug(
+            "deployment {}: all green healthy, waiting for manual promotion",
+            deployment.id,
+        )
+        return CycleEvaluationResult(sub_step=DeploymentSubStep.PROGRESSING)
+
+    # ── 7. auto_promote=True + delay>0 → check elapsed time ──
+    if spec.promote_delay_seconds > 0:
+        latest_healthy_at = _latest_status_updated_at(green_healthy)
+        if latest_healthy_at is None:
+            log.debug(
+                "deployment {}: all green healthy but status_updated_at unknown — waiting",
+                deployment.id,
+            )
+            return CycleEvaluationResult(sub_step=DeploymentSubStep.PROGRESSING)
+        elapsed = (datetime.now(UTC) - latest_healthy_at).total_seconds()
+        if elapsed < spec.promote_delay_seconds:
+            log.debug(
+                "deployment {}: promote delay {:.0f}/{} seconds elapsed — waiting",
+                deployment.id,
+                elapsed,
+                spec.promote_delay_seconds,
+            )
+            return CycleEvaluationResult(sub_step=DeploymentSubStep.PROGRESSING)
+
+    # ── 8. Promotion: green → ACTIVE, blue → TERMINATING ──
+    log.info(
+        "deployment {}: promoting {} green routes, terminating {} blue routes",
+        deployment.id,
+        len(green_healthy),
+        len(blue_active),
+    )
+    route_changes = RouteChanges(
+        promote_route_ids=[r.route_id for r in green_healthy],
+        drain_route_ids=[r.route_id for r in blue_active],
+    )
+    return CycleEvaluationResult(
+        sub_step=DeploymentSubStep.PROGRESSING,
+        completed=True,
+        route_changes=route_changes,
+    )
+
+
+def _latest_status_updated_at(routes: list[RouteInfo]) -> datetime | None:
+    """Return the most recent status_updated_at among the given routes."""
+    timestamps = [r.status_updated_at for r in routes if r.status_updated_at is not None]
+    return max(timestamps) if timestamps else None
+
+
+def _build_route_creators(
+    deployment: DeploymentInfo,
+    count: int,
+) -> list[Creator[RoutingRow]]:
+    """Build route creator specs for green routes (INACTIVE, traffic_ratio=0.0)."""
+    creators: list[Creator[RoutingRow]] = []
+    for _ in range(count):
+        creator_spec = RouteCreatorSpec(
+            endpoint_id=deployment.id,
+            session_owner_id=deployment.metadata.session_owner,
+            domain=deployment.metadata.domain,
+            project_id=deployment.metadata.project,
+            revision_id=deployment.deploying_revision_id,
+            traffic_status=RouteTrafficStatus.INACTIVE,
+            traffic_ratio=0.0,
+        )
+        creators.append(Creator(spec=creator_spec))
+    return creators
diff --git a/src/ai/backend/manager/sokovan/deployment/strategy/evaluator.py b/src/ai/backend/manager/sokovan/deployment/strategy/evaluator.py
index 9d84ced2104..88d96e208d3 100644
--- a/src/ai/backend/manager/sokovan/deployment/strategy/evaluator.py
+++ b/src/ai/backend/manager/sokovan/deployment/strategy/evaluator.py
@@ -69,7 +69,7 @@ async def evaluate(
             )
         )
         policy_map = {p.endpoint: p for p in policy_search.items}
-        route_map = await self._deployment_repo.fetch_active_routes_by_endpoint_ids(endpoint_ids)
+        route_map = await self._deployment_repo.fetch_routes_by_endpoint_ids(endpoint_ids)
 
         # ── 2. Per-deployment evaluation ──
         for deployment in deployments:
@@ -92,6 +92,7 @@ async def evaluate(
             changes = cycle_result.route_changes
             result.route_changes.rollout_specs.extend(changes.rollout_specs)
             result.route_changes.drain_route_ids.extend(changes.drain_route_ids)
+            result.route_changes.promote_route_ids.extend(changes.promote_route_ids)
             self._record_route_changes(deployment, changes)
 
             # Group by sub-step
@@ -109,8 +110,8 @@ async def evaluate(
 
     @staticmethod
     def _record_route_changes(deployment: DeploymentInfo, changes: RouteChanges) -> None:
-        """Record rollout/drain operations as sub-steps for observability."""
-        if not changes.rollout_specs and not changes.drain_route_ids:
+        """Record rollout/drain/promote operations as sub-steps for observability."""
+        if not changes.rollout_specs and not changes.drain_route_ids and not changes.promote_route_ids:
             return
         pool = DeploymentRecorderContext.current_pool()
         recorder = pool.recorder(deployment.id)
@@ -127,6 +128,12 @@ def _record_route_changes(deployment: DeploymentInfo, changes: RouteChanges) ->
                     success_detail=f"{len(changes.drain_route_ids)} route(s)",
                 ):
                     pass
+            if changes.promote_route_ids:
+                with recorder.step(
+                    "promote",
+                    success_detail=f"{len(changes.promote_route_ids)} route(s)",
+                ):
+                    pass
 
     def _evaluate_single(
         self,
diff --git a/src/ai/backend/manager/sokovan/deployment/strategy/types.py b/src/ai/backend/manager/sokovan/deployment/strategy/types.py
index 615d6e8238f..8c32682e6a0 100644
--- a/src/ai/backend/manager/sokovan/deployment/strategy/types.py
+++ b/src/ai/backend/manager/sokovan/deployment/strategy/types.py
@@ -20,6 +20,7 @@ class RouteChanges:
 
     rollout_specs: list[Creator[RoutingRow]] = field(default_factory=list)
     drain_route_ids: list[UUID] = field(default_factory=list)
+    promote_route_ids: list[UUID] = field(default_factory=list)
 
 
 @dataclass
diff --git a/tests/unit/manager/sokovan/deployment/strategy/BUILD b/tests/unit/manager/sokovan/deployment/strategy/BUILD
new file mode 100644
index 00000000000..57341b1358b
--- /dev/null
+++ b/tests/unit/manager/sokovan/deployment/strategy/BUILD
@@ -0,0 +1,3 @@
+python_tests(
+    name="tests",
+)
diff --git a/tests/unit/manager/sokovan/deployment/strategy/__init__.py b/tests/unit/manager/sokovan/deployment/strategy/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/unit/manager/sokovan/deployment/strategy/test_blue_green.py b/tests/unit/manager/sokovan/deployment/strategy/test_blue_green.py
new file mode 100644
index 00000000000..3402aff3b22
--- /dev/null
+++ b/tests/unit/manager/sokovan/deployment/strategy/test_blue_green.py
@@ -0,0 +1,1611 @@
+"""Comprehensive tests for the blue-green deployment strategy FSM (BEP-1049).
+
+Tests cover:
+- FSM state transitions: PROVISIONING, PROGRESSING, ROLLED_BACK, completed
+- auto_promote / promote_delay_seconds combinations
+- Single and multi-replica scenarios
+- Edge cases: no routes, all failed, mixed statuses, desired=0
+- Multi-cycle progression simulation
+- Route creator specs validation
+- desired_replica_count vs replica_count
+- Scale-down during blue-green deployment
+- Concurrent provisioning checks
+- Realistic multi-step scenarios
+"""
+
+from __future__ import annotations
+
+from datetime import UTC, datetime, timedelta
+from uuid import UUID, uuid4
+
+from ai.backend.common.data.endpoint.types import EndpointLifecycle
+from ai.backend.common.types import SessionId
+from ai.backend.manager.data.deployment.types import (
+    DeploymentInfo,
+    DeploymentMetadata,
+    DeploymentNetworkSpec,
+    DeploymentState,
+    DeploymentSubStep,
+    ReplicaSpec,
+    RouteInfo,
+    RouteStatus,
+    RouteTrafficStatus,
+)
+from ai.backend.manager.models.deployment_policy import BlueGreenSpec
+from ai.backend.manager.repositories.deployment.creators import RouteCreatorSpec
+from ai.backend.manager.sokovan.deployment.strategy.blue_green import blue_green_evaluate
+from ai.backend.manager.sokovan.deployment.strategy.types import CycleEvaluationResult
+
+ENDPOINT_ID = UUID("aaaaaaaa-0000-0000-0000-aaaaaaaaaaaa")
+OLD_REV = UUID("11111111-1111-1111-1111-111111111111")
+NEW_REV = UUID("22222222-2222-2222-2222-222222222222")
+PROJECT_ID = UUID("cccccccc-cccc-cccc-cccc-cccccccccccc")
+USER_ID = UUID("dddddddd-dddd-dddd-dddd-dddddddddddd")
+
+
+def make_deployment(
+    *,
+    desired: int = 3,
+    deploying_revision_id: UUID = NEW_REV,
+    current_revision_id: UUID = OLD_REV,
+    endpoint_id: UUID = ENDPOINT_ID,
+) -> DeploymentInfo:
+    return DeploymentInfo(
+        id=endpoint_id,
+        metadata=DeploymentMetadata(
+            name="test-deploy",
+            domain="default",
+            project=PROJECT_ID,
+            resource_group="default",
+            created_user=USER_ID,
+            session_owner=USER_ID,
+            created_at=datetime.now(UTC),
+            revision_history_limit=5,
+        ),
+        state=DeploymentState(
+            lifecycle=EndpointLifecycle.DEPLOYING,
+            retry_count=0,
+        ),
+        replica_spec=ReplicaSpec(
+            replica_count=desired,
+        ),
+        network=DeploymentNetworkSpec(open_to_public=False),
+        model_revisions=[],
+        current_revision_id=current_revision_id,
+        deploying_revision_id=deploying_revision_id,
+    )
+
+
+def make_route(
+    *,
+    revision_id: UUID,
+    status: RouteStatus = RouteStatus.HEALTHY,
+    endpoint_id: UUID = ENDPOINT_ID,
+    route_id: UUID | None = None,
+    traffic_status: RouteTrafficStatus | None = None,
+    traffic_ratio: float | None = None,
+    status_updated_at: datetime | None = None,
+) -> RouteInfo:
+    if traffic_status is None:
+        traffic_status = (
+            RouteTrafficStatus.ACTIVE if status.is_active() else RouteTrafficStatus.INACTIVE
+        )
+    if traffic_ratio is None:
+        traffic_ratio = 1.0 if status.is_active() else 0.0
+    return RouteInfo(
+        route_id=route_id or uuid4(),
+        endpoint_id=endpoint_id,
+        session_id=SessionId(uuid4()),
+        status=status,
+        traffic_ratio=traffic_ratio,
+        created_at=datetime.now(UTC),
+        revision_id=revision_id,
+        traffic_status=traffic_status,
+        status_updated_at=status_updated_at,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _count_scale_out(result: CycleEvaluationResult) -> int:
+    return len(result.route_changes.scale_out_specs)
+
+
+def _scale_in_ids(result: CycleEvaluationResult) -> list[UUID]:
+    return result.route_changes.scale_in_route_ids
+
+
+def _promote_ids(result: CycleEvaluationResult) -> list[UUID]:
+    return result.route_changes.promote_route_ids
+
+
+def _blue_routes(
+    count: int,
+    *,
+    status: RouteStatus = RouteStatus.HEALTHY,
+) -> list[RouteInfo]:
+    return [
+        make_route(
+            revision_id=OLD_REV,
+            status=status,
+            traffic_status=RouteTrafficStatus.ACTIVE,
+            traffic_ratio=1.0,
+        )
+        for _ in range(count)
+    ]
+
+
+def _green_routes(
+    count: int,
+    *,
+    status: RouteStatus = RouteStatus.HEALTHY,
+    traffic_status: RouteTrafficStatus = RouteTrafficStatus.INACTIVE,
+    traffic_ratio: float = 0.0,
+    status_updated_at: datetime | None = None,
+) -> list[RouteInfo]:
+    return [
+        make_route(
+            revision_id=NEW_REV,
+            status=status,
+            traffic_status=traffic_status,
+            traffic_ratio=traffic_ratio,
+            status_updated_at=status_updated_at,
+        )
+        for _ in range(count)
+    ]
+
+
+# ===========================================================================
+# 1. Basic FSM states
+# ===========================================================================
+
+
+class TestBasicFSMStates:
+    """Test fundamental FSM transitions."""
+
+    def test_no_routes_initial_cycle_creates_green(self) -> None:
+        """First cycle with 0 routes → PROVISIONING, creates desired count."""
+        deployment = make_deployment(desired=3)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+
+        result = blue_green_evaluate(deployment, [], spec)
+
+        assert result.sub_step == DeploymentSubStep.PROVISIONING
+        assert not result.completed
+        assert _count_scale_out(result) == 3
+        assert len(_scale_in_ids(result)) == 0
+        assert len(_promote_ids(result)) == 0
+
+    def test_green_provisioning_waits(self) -> None:
+        """Green routes in PROVISIONING → wait (PROVISIONING sub-step)."""
+        deployment = make_deployment(desired=3)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        routes = _blue_routes(3) + _green_routes(3, status=RouteStatus.PROVISIONING)
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.sub_step == DeploymentSubStep.PROVISIONING
+        assert not result.completed
+        assert _count_scale_out(result) == 0
+        assert len(_scale_in_ids(result)) == 0
+        assert len(_promote_ids(result)) == 0
+
+    def test_completed_when_all_green_healthy_auto_promote(self) -> None:
+        """All green healthy + auto_promote + delay=0 → completed."""
+        deployment = make_deployment(desired=3)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        routes = _blue_routes(3) + _green_routes(3, status=RouteStatus.HEALTHY)
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.completed
+        assert result.sub_step == DeploymentSubStep.PROGRESSING
+        assert len(_promote_ids(result)) == 3
+        assert len(_scale_in_ids(result)) == 3
+
+    def test_rollback_when_all_green_failed(self) -> None:
+        """All green routes failed → ROLLED_BACK."""
+        deployment = make_deployment(desired=3)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        routes = _blue_routes(3) + _green_routes(3, status=RouteStatus.FAILED_TO_START)
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.sub_step == DeploymentSubStep.ROLLED_BACK
+        assert not result.completed
+
+    def test_rollback_with_terminated_green_routes(self) -> None:
+        """Green routes in TERMINATED also count as failed."""
+        deployment = make_deployment(desired=2)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        routes = _blue_routes(2) + _green_routes(2, status=RouteStatus.TERMINATED)
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.sub_step == DeploymentSubStep.ROLLED_BACK
+        assert not result.completed
+
+
+# ===========================================================================
+# 2. auto_promote variations
+# ===========================================================================
+
+
+class TestAutoPromote:
+    """Test auto_promote parameter controls."""
+
+    def test_auto_promote_true_delay_zero_promotes(self) -> None:
+        """auto_promote=True, delay=0 → promote immediately."""
+        deployment = make_deployment(desired=3)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        routes = _blue_routes(3) + _green_routes(3, status=RouteStatus.HEALTHY)
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.completed
+        assert len(_promote_ids(result)) == 3
+        assert len(_scale_in_ids(result)) == 3
+
+    def test_auto_promote_false_waits_for_manual(self) -> None:
+        """auto_promote=False → PROGRESSING, waiting for manual promotion."""
+        deployment = make_deployment(desired=3)
+        spec = BlueGreenSpec(auto_promote=False, promote_delay_seconds=0)
+        routes = _blue_routes(3) + _green_routes(3, status=RouteStatus.HEALTHY)
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.sub_step == DeploymentSubStep.PROGRESSING
+        assert not result.completed
+        assert len(_promote_ids(result)) == 0
+        assert len(_scale_in_ids(result)) == 0
+
+    def test_auto_promote_true_delay_positive_waits(self) -> None:
+        """auto_promote=True, delay>0 + recently healthy → PROGRESSING (delay wait)."""
+        deployment = make_deployment(desired=3)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=60)
+        recently_healthy = datetime.now(UTC) - timedelta(seconds=10)
+        routes = _blue_routes(3) + _green_routes(
+            3, status=RouteStatus.HEALTHY, status_updated_at=recently_healthy
+        )
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.sub_step == DeploymentSubStep.PROGRESSING
+        assert not result.completed
+        assert len(_promote_ids(result)) == 0
+        assert len(_scale_in_ids(result)) == 0
+
+    def test_auto_promote_false_delay_positive_still_waits(self) -> None:
+        """auto_promote=False, delay>0 → PROGRESSING (manual overrides delay)."""
+        deployment = make_deployment(desired=3)
+        spec = BlueGreenSpec(auto_promote=False, promote_delay_seconds=120)
+        routes = _blue_routes(3) + _green_routes(3, status=RouteStatus.HEALTHY)
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.sub_step == DeploymentSubStep.PROGRESSING
+        assert not result.completed
+        assert len(_promote_ids(result)) == 0
+
+    def test_auto_promote_true_delay_1_second_waits(self) -> None:
+        """auto_promote=True, delay=1 + just became healthy → still waits."""
+        deployment = make_deployment(desired=2)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=1)
+        just_now = datetime.now(UTC)
+        routes = _blue_routes(2) + _green_routes(
+            2, status=RouteStatus.HEALTHY, status_updated_at=just_now
+        )
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert not result.completed
+        assert len(_promote_ids(result)) == 0
+
+    def test_auto_promote_true_delay_elapsed_promotes(self) -> None:
+        """auto_promote=True, delay>0 + delay fully elapsed → completed."""
+        deployment = make_deployment(desired=3)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=30)
+        long_ago = datetime.now(UTC) - timedelta(seconds=60)
+        routes = _blue_routes(3) + _green_routes(
+            3, status=RouteStatus.HEALTHY, status_updated_at=long_ago
+        )
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.completed
+        assert len(_promote_ids(result)) == 3
+        assert len(_scale_in_ids(result)) == 3
+
+    def test_auto_promote_delay_no_status_updated_at_waits(self) -> None:
+        """auto_promote=True, delay>0 + status_updated_at=None → PROGRESSING (wait)."""
+        deployment = make_deployment(desired=2)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=30)
+        routes = _blue_routes(2) + _green_routes(
+            2, status=RouteStatus.HEALTHY, status_updated_at=None
+        )
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert not result.completed
+        assert result.sub_step == DeploymentSubStep.PROGRESSING
+        assert len(_promote_ids(result)) == 0
+
+    def test_auto_promote_delay_uses_latest_timestamp(self) -> None:
+        """With mixed timestamps, delay check uses the latest one."""
+        deployment = make_deployment(desired=2)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=30)
+        # One route healthy long ago, another route healthy recently
+        old_healthy = make_route(
+            revision_id=NEW_REV,
+            status=RouteStatus.HEALTHY,
+            traffic_status=RouteTrafficStatus.INACTIVE,
+            traffic_ratio=0.0,
+            status_updated_at=datetime.now(UTC) - timedelta(seconds=120),
+        )
+        recent_healthy = make_route(
+            revision_id=NEW_REV,
+            status=RouteStatus.HEALTHY,
+            traffic_status=RouteTrafficStatus.INACTIVE,
+            traffic_ratio=0.0,
+            status_updated_at=datetime.now(UTC) - timedelta(seconds=5),
+        )
+        routes = _blue_routes(2) + [old_healthy, recent_healthy]
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        # Latest is 5 seconds ago, delay is 30 seconds → not elapsed yet
+        assert not result.completed
+        assert result.sub_step == DeploymentSubStep.PROGRESSING
+
+    def test_default_spec_auto_promote_false(self) -> None:
+        """Default BlueGreenSpec has auto_promote=False."""
+        deployment = make_deployment(desired=2)
+        spec = BlueGreenSpec()
+        routes = _blue_routes(2) + _green_routes(2, status=RouteStatus.HEALTHY)
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert not result.completed
+        assert result.sub_step == DeploymentSubStep.PROGRESSING
+
+
+# ===========================================================================
+# 3. Provisioning states
+# ===========================================================================
+
+
+class TestProvisioningStates:
+    """Test PROVISIONING sub-step behaviors."""
+
+    def test_all_green_provisioning(self) -> None:
+        """All green routes PROVISIONING → PROVISIONING sub-step."""
+        deployment = make_deployment(desired=3)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        routes = _blue_routes(3) + _green_routes(3, status=RouteStatus.PROVISIONING)
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.sub_step == DeploymentSubStep.PROVISIONING
+        assert not result.completed
+        assert _count_scale_out(result) == 0
+
+    def test_partial_provisioning_partial_healthy(self) -> None:
+        """Some green PROVISIONING + some HEALTHY → PROVISIONING (wait)."""
+        deployment = make_deployment(desired=3)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        routes = (
+            _blue_routes(3)
+            + _green_routes(1, status=RouteStatus.HEALTHY)
+            + _green_routes(2, status=RouteStatus.PROVISIONING)
+        )
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.sub_step == DeploymentSubStep.PROVISIONING
+        assert not result.completed
+
+    def test_single_provisioning_among_many_healthy(self) -> None:
+        """Even 1 PROVISIONING green among many healthy → PROVISIONING."""
+        deployment = make_deployment(desired=5)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        routes = (
+            _blue_routes(5)
+            + _green_routes(4, status=RouteStatus.HEALTHY)
+            + _green_routes(1, status=RouteStatus.PROVISIONING)
+        )
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.sub_step == DeploymentSubStep.PROVISIONING
+        assert not result.completed
+
+    def test_no_green_with_blue_creates_all(self) -> None:
+        """Blue routes exist, no green → create all desired green routes."""
+        deployment = make_deployment(desired=3)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        routes = _blue_routes(3)
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.sub_step == DeploymentSubStep.PROVISIONING
+        assert _count_scale_out(result) == 3
+
+    def test_no_green_no_blue_creates_all(self) -> None:
+        """Fresh deployment with no routes → create all desired."""
+        deployment = make_deployment(desired=3)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+
+        result = blue_green_evaluate(deployment, [], spec)
+
+        assert result.sub_step == DeploymentSubStep.PROVISIONING
+        assert _count_scale_out(result) == 3
+
+
+# ===========================================================================
+# 4. Rollback scenarios
+# ===========================================================================
+
+
+class TestRollbackScenarios:
+    """Test rollback behavior when green routes fail."""
+
+    def test_all_green_failed_to_start_rollback(self) -> None:
+        """All green FAILED_TO_START → ROLLED_BACK with scale_in for failed routes."""
+        deployment = make_deployment(desired=3)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        greens = _green_routes(3, status=RouteStatus.FAILED_TO_START)
+        routes = _blue_routes(3) + greens
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.sub_step == DeploymentSubStep.ROLLED_BACK
+        assert not result.completed
+        green_ids = {r.route_id for r in greens}
+        assert set(_scale_in_ids(result)) == green_ids
+        assert len(_promote_ids(result)) == 0
+
+    def test_all_green_terminated_rollback(self) -> None:
+        """All green TERMINATED → ROLLED_BACK."""
+        deployment = make_deployment(desired=3)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        greens = _green_routes(3, status=RouteStatus.TERMINATED)
+        routes = _blue_routes(3) + greens
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.sub_step == DeploymentSubStep.ROLLED_BACK
+        green_ids = {r.route_id for r in greens}
+        assert set(_scale_in_ids(result)) == green_ids
+
+    def test_mixed_failed_and_terminated_green_rollback(self) -> None:
+        """Mixed FAILED_TO_START + TERMINATED green → ROLLED_BACK."""
+        deployment = make_deployment(desired=4)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        greens = _green_routes(2, status=RouteStatus.FAILED_TO_START) + _green_routes(
+            2, status=RouteStatus.TERMINATED
+        )
+        routes = _blue_routes(4) + greens
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.sub_step == DeploymentSubStep.ROLLED_BACK
+        green_ids = {r.route_id for r in greens}
+        assert set(_scale_in_ids(result)) == green_ids
+
+    def test_rollback_no_blue_routes(self) -> None:
+        """All green failed with no blue routes → ROLLED_BACK."""
+        deployment = make_deployment(desired=2)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        greens = _green_routes(2, status=RouteStatus.FAILED_TO_START)
+
+        result = blue_green_evaluate(deployment, greens, spec)
+
+        assert result.sub_step == DeploymentSubStep.ROLLED_BACK
+        assert not result.completed
+
+    def test_rollback_preserves_blue_routes(self) -> None:
+        """On rollback, blue routes are NOT scale_in'd — only green routes."""
+        deployment = make_deployment(desired=3)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        blues = _blue_routes(3)
+        greens = _green_routes(3, status=RouteStatus.FAILED_TO_START)
+        routes = blues + greens
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.sub_step == DeploymentSubStep.ROLLED_BACK
+        blue_ids = {r.route_id for r in blues}
+        green_ids = {r.route_id for r in greens}
+        assert set(_scale_in_ids(result)) == green_ids
+        assert blue_ids.isdisjoint(set(_scale_in_ids(result)))
+
+
+# ===========================================================================
+# 5. Mixed green statuses (healthy + failed, no provisioning)
+# ===========================================================================
+
+
+class TestMixedGreenStatuses:
+    """Test with green routes in various mixed states."""
+
+    def test_healthy_and_failed_mixed_progressing(self) -> None:
+        """Some green healthy, some failed (no provisioning) → PROGRESSING."""
+        deployment = make_deployment(desired=3)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        routes = (
+            _blue_routes(3)
+            + _green_routes(1, status=RouteStatus.HEALTHY)
+            + _green_routes(2, status=RouteStatus.FAILED_TO_START)
+        )
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.sub_step == DeploymentSubStep.PROGRESSING
+        assert not result.completed
+
+    def test_healthy_and_terminated_mixed_progressing(self) -> None:
+        """Some green healthy, some terminated → PROGRESSING (not enough healthy)."""
+        deployment = make_deployment(desired=3)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        routes = (
+            _blue_routes(3)
+            + _green_routes(2, status=RouteStatus.HEALTHY)
+            + _green_routes(1, status=RouteStatus.TERMINATED)
+        )
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.sub_step == DeploymentSubStep.PROGRESSING
+        assert not result.completed
+
+    def test_degraded_green_counts_as_healthy(self) -> None:
+        """DEGRADED green routes count as active (is_active=True)."""
+        deployment = make_deployment(desired=1)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        routes = _blue_routes(1) + _green_routes(1, status=RouteStatus.DEGRADED)
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.completed
+        assert len(_promote_ids(result)) == 1
+
+    def test_unhealthy_green_counts_as_healthy(self) -> None:
+        """UNHEALTHY green routes count as active."""
+        deployment = make_deployment(desired=1)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        routes = _blue_routes(1) + _green_routes(1, status=RouteStatus.UNHEALTHY)
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.completed
+        assert len(_promote_ids(result)) == 1
+
+    def test_mix_degraded_and_healthy_green_promoted(self) -> None:
+        """Mix of DEGRADED and HEALTHY green → all promoted on completion."""
+        deployment = make_deployment(desired=3)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        greens = _green_routes(2, status=RouteStatus.HEALTHY) + _green_routes(
+            1, status=RouteStatus.DEGRADED
+        )
+        routes = _blue_routes(3) + greens
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.completed
+        green_ids = {r.route_id for r in greens}
+        assert set(_promote_ids(result)) == green_ids
+
+    def test_mix_unhealthy_and_healthy_green_promoted(self) -> None:
+        """Mix of UNHEALTHY and HEALTHY green → all promoted on completion."""
+        deployment = make_deployment(desired=2)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        greens = _green_routes(1, status=RouteStatus.HEALTHY) + _green_routes(
+            1, status=RouteStatus.UNHEALTHY
+        )
+        routes = _blue_routes(2) + greens
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.completed
+        green_ids = {r.route_id for r in greens}
+        assert set(_promote_ids(result)) == green_ids
+
+
+# ===========================================================================
+# 6. Blue route status variations
+# ===========================================================================
+
+
+class TestBlueRouteStatuses:
+    """Test how different blue route statuses are handled."""
+
+    def test_blue_terminating_not_counted_as_active(self) -> None:
+        """Blue routes in TERMINATING are not counted as blue_active."""
+        deployment = make_deployment(desired=1)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        greens = _green_routes(1, status=RouteStatus.HEALTHY)
+        routes = [
+            make_route(
+                revision_id=OLD_REV,
+                status=RouteStatus.TERMINATING,
+                traffic_status=RouteTrafficStatus.INACTIVE,
+                traffic_ratio=0.0,
+            ),
+        ] + greens
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.completed
+        # Only green promoted, no blue in scale_in (terminating is not active)
+        assert len(_promote_ids(result)) == 1
+        assert len(_scale_in_ids(result)) == 0
+
+    def test_blue_terminated_not_counted(self) -> None:
+        """Blue routes in TERMINATED are not counted as blue_active."""
+        deployment = make_deployment(desired=1)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        greens = _green_routes(1, status=RouteStatus.HEALTHY)
+        routes = [
+            make_route(
+                revision_id=OLD_REV,
+                status=RouteStatus.TERMINATED,
+                traffic_status=RouteTrafficStatus.INACTIVE,
+                traffic_ratio=0.0,
+            ),
+        ] + greens
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.completed
+        assert len(_scale_in_ids(result)) == 0
+
+    def test_blue_failed_not_counted_as_active(self) -> None:
+        """Blue routes in FAILED_TO_START are not counted as blue_active."""
+        deployment = make_deployment(desired=1)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        greens = _green_routes(1, status=RouteStatus.HEALTHY)
+        routes = [
+            make_route(
+                revision_id=OLD_REV,
+                status=RouteStatus.FAILED_TO_START,
+                traffic_status=RouteTrafficStatus.INACTIVE,
+                traffic_ratio=0.0,
+            ),
+        ] + greens
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.completed
+        assert len(_scale_in_ids(result)) == 0
+
+    def test_mixed_blue_statuses_only_active_scale_in(self) -> None:
+        """Only active blue routes are included in scale_in."""
+        deployment = make_deployment(desired=2)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        active_blue = make_route(
+            revision_id=OLD_REV,
+            status=RouteStatus.HEALTHY,
+            traffic_status=RouteTrafficStatus.ACTIVE,
+            traffic_ratio=1.0,
+        )
+        inactive_blue = make_route(
+            revision_id=OLD_REV,
+            status=RouteStatus.TERMINATING,
+            traffic_status=RouteTrafficStatus.INACTIVE,
+            traffic_ratio=0.0,
+        )
+        greens = _green_routes(2, status=RouteStatus.HEALTHY)
+        routes = [active_blue, inactive_blue] + greens
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.completed
+        assert _scale_in_ids(result) == [active_blue.route_id]
+
+    def test_blue_degraded_counted_as_active(self) -> None:
+        """Blue routes in DEGRADED are counted as active → included in scale_in."""
+        deployment = make_deployment(desired=1)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        blue = make_route(
+            revision_id=OLD_REV,
+            status=RouteStatus.DEGRADED,
+            traffic_status=RouteTrafficStatus.ACTIVE,
+            traffic_ratio=1.0,
+        )
+        greens = _green_routes(1, status=RouteStatus.HEALTHY)
+        routes = [blue] + greens
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.completed
+        assert _scale_in_ids(result) == [blue.route_id]
+
+    def test_blue_unhealthy_counted_as_active(self) -> None:
+        """Blue routes in UNHEALTHY are counted as active → included in scale_in."""
+        deployment = make_deployment(desired=1)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        blue = make_route(
+            revision_id=OLD_REV,
+            status=RouteStatus.UNHEALTHY,
+            traffic_status=RouteTrafficStatus.ACTIVE,
+            traffic_ratio=1.0,
+        )
+        greens = _green_routes(1, status=RouteStatus.HEALTHY)
+        routes = [blue] + greens
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.completed
+        assert _scale_in_ids(result) == [blue.route_id]
+
+
+# ===========================================================================
+# 7. Multi-cycle progression
+# ===========================================================================
+
+
+class TestMultiCycleProgression:
+    """Simulate multiple evaluation cycles."""
+
+    def test_cycle_1_no_green_creates_all(self) -> None:
+        """Cycle 1: blue only → creates desired green routes."""
+        deployment = make_deployment(desired=3)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        routes = _blue_routes(3)
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.sub_step == DeploymentSubStep.PROVISIONING
+        assert _count_scale_out(result) == 3
+
+    def test_cycle_2_green_provisioning_waits(self) -> None:
+        """Cycle 2: green PROVISIONING → wait."""
+        deployment = make_deployment(desired=3)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        routes = _blue_routes(3) + _green_routes(3, status=RouteStatus.PROVISIONING)
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.sub_step == DeploymentSubStep.PROVISIONING
+        assert not result.completed
+
+    def test_cycle_3_partial_green_healthy_waits(self) -> None:
+        """Cycle 3: some green healthy, some provisioning → still PROVISIONING."""
+        deployment = make_deployment(desired=3)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        routes = (
+            _blue_routes(3)
+            + _green_routes(2, status=RouteStatus.HEALTHY)
+            + _green_routes(1, status=RouteStatus.PROVISIONING)
+        )
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.sub_step == DeploymentSubStep.PROVISIONING
+
+    def test_cycle_4_all_green_healthy_promotes(self) -> None:
+        """Cycle 4: all green healthy → completed with promotion."""
+        deployment = make_deployment(desired=3)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        routes = _blue_routes(3) + _green_routes(3, status=RouteStatus.HEALTHY)
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.completed
+        assert len(_promote_ids(result)) == 3
+        assert len(_scale_in_ids(result)) == 3
+
+    def test_not_completed_when_green_less_than_desired(self) -> None:
+        """Green healthy < desired → PROGRESSING (not enough)."""
+        deployment = make_deployment(desired=3)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        routes = _blue_routes(3) + _green_routes(2, status=RouteStatus.HEALTHY)
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.sub_step == DeploymentSubStep.PROGRESSING
+        assert not result.completed
+
+
+# ===========================================================================
+# 8. Promotion route ID verification
+# ===========================================================================
+
+
+class TestPromotionRouteIdVerification:
+    """Verify promote and scale_in route IDs are exact matches."""
+
+    def test_promote_ids_match_green_healthy(self) -> None:
+        """Promoted route IDs must exactly match green healthy route IDs."""
+        deployment = make_deployment(desired=3)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        blues = _blue_routes(3)
+        greens = _green_routes(3, status=RouteStatus.HEALTHY)
+        routes = blues + greens
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        expected_promote = [r.route_id for r in greens]
+        assert _promote_ids(result) == expected_promote
+
+    def test_scale_in_ids_match_blue_active(self) -> None:
+        """Scale-in route IDs must exactly match blue active route IDs."""
+        deployment = make_deployment(desired=3)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        blues = _blue_routes(3)
+        greens = _green_routes(3, status=RouteStatus.HEALTHY)
+        routes = blues + greens
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        expected_scale_in = [r.route_id for r in blues]
+        assert _scale_in_ids(result) == expected_scale_in
+
+    def test_no_cross_contamination_between_promote_and_scale_in(self) -> None:
+        """Promote IDs and scale_in IDs must be disjoint sets."""
+        deployment = make_deployment(desired=3)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        blues = _blue_routes(3)
+        greens = _green_routes(3, status=RouteStatus.HEALTHY)
+        routes = blues + greens
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        promote_set = set(_promote_ids(result))
+        scale_in_set = set(_scale_in_ids(result))
+        assert promote_set.isdisjoint(scale_in_set)
+
+    def test_promote_ids_order_matches_green_order(self) -> None:
+        """Promote IDs order should match the order green routes were processed."""
+        deployment = make_deployment(desired=3)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        greens = _green_routes(3, status=RouteStatus.HEALTHY)
+        routes = _blue_routes(3) + greens
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert _promote_ids(result) == [r.route_id for r in greens]
+
+
+# ===========================================================================
+# 9. Route creator specs validation
+# ===========================================================================
+
+
+class TestRouteCreatorSpecs:
+    """Validate that route creator specs have correct fields."""
+
+    def test_creator_specs_use_deploying_revision(self) -> None:
+        """Created routes should use the deploying revision."""
+        deployment = make_deployment(desired=1)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+
+        result = blue_green_evaluate(deployment, [], spec)
+
+        assert _count_scale_out(result) == 1
+        creator_spec = result.route_changes.scale_out_specs[0].spec
+        assert isinstance(creator_spec, RouteCreatorSpec)
+        assert creator_spec.revision_id == NEW_REV
+        assert creator_spec.endpoint_id == ENDPOINT_ID
+        assert creator_spec.session_owner_id == USER_ID
+        assert creator_spec.domain == "default"
+        assert creator_spec.project_id == PROJECT_ID
+
+    def test_creator_specs_have_inactive_traffic(self) -> None:
+        """Green routes must be created with INACTIVE traffic status."""
+        deployment = make_deployment(desired=2)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        routes = _blue_routes(2)
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        for creator in result.route_changes.scale_out_specs:
+            creator_spec = creator.spec
+            assert isinstance(creator_spec, RouteCreatorSpec)
+            assert creator_spec.traffic_status == RouteTrafficStatus.INACTIVE
+            assert creator_spec.traffic_ratio == 0.0
+
+    def test_multiple_creators_all_correct(self) -> None:
+        """Multiple creators all have correct metadata."""
+        deployment = make_deployment(desired=5)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+
+        result = blue_green_evaluate(deployment, [], spec)
+
+        assert _count_scale_out(result) == 5
+        for creator in result.route_changes.scale_out_specs:
+            creator_spec = creator.spec
+            assert isinstance(creator_spec, RouteCreatorSpec)
+            assert creator_spec.revision_id == NEW_REV
+            assert creator_spec.endpoint_id == ENDPOINT_ID
+            assert creator_spec.traffic_status == RouteTrafficStatus.INACTIVE
+            assert creator_spec.traffic_ratio == 0.0
+
+    def test_creator_specs_different_route_ids(self) -> None:
+        """Each creator should produce a unique route (verified by spec fields)."""
+        deployment = make_deployment(desired=3)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+
+        result = blue_green_evaluate(deployment, [], spec)
+
+        assert _count_scale_out(result) == 3
+        # All creators should have the same deploying revision but be separate instances
+        for creator in result.route_changes.scale_out_specs:
+            assert isinstance(creator.spec, RouteCreatorSpec)
+            assert creator.spec.revision_id == NEW_REV
+
+
+# ===========================================================================
+# 10. Edge cases
+# ===========================================================================
+
+
+class TestEdgeCases:
+    """Edge cases and boundary conditions."""
+
+    def test_desired_1_single_replica_full_lifecycle(self) -> None:
+        """desired=1 → create 1 green, promote 1 green, terminate 1 blue."""
+        deployment = make_deployment(desired=1)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+
+        # Cycle 1: no green → create 1
+        routes = _blue_routes(1)
+        r1 = blue_green_evaluate(deployment, routes, spec)
+        assert _count_scale_out(r1) == 1
+
+        # Cycle 2: green healthy → promote
+        routes = _blue_routes(1) + _green_routes(1, status=RouteStatus.HEALTHY)
+        r2 = blue_green_evaluate(deployment, routes, spec)
+        assert r2.completed
+        assert len(_promote_ids(r2)) == 1
+        assert len(_scale_in_ids(r2)) == 1
+
+    def test_desired_0_no_routes_no_creation(self) -> None:
+        """desired=0, no routes → PROVISIONING with 0 green created."""
+        deployment = make_deployment(desired=0)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+
+        result = blue_green_evaluate(deployment, [], spec)
+
+        # No green needed, so completion with 0 green
+        assert _count_scale_out(result) == 0
+
+    def test_more_green_healthy_than_desired(self) -> None:
+        """green_healthy > desired → still promotes (completes)."""
+        deployment = make_deployment(desired=2)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        greens = _green_routes(3, status=RouteStatus.HEALTHY)
+        routes = _blue_routes(2) + greens
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.completed
+        green_ids = {r.route_id for r in greens}
+        assert set(_promote_ids(result)) == green_ids
+
+    def test_only_failed_green_no_blue_rolls_back(self) -> None:
+        """Only failed green routes, no blue → ROLLED_BACK."""
+        deployment = make_deployment(desired=2)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        greens = _green_routes(2, status=RouteStatus.FAILED_TO_START)
+
+        result = blue_green_evaluate(deployment, greens, spec)
+
+        assert result.sub_step == DeploymentSubStep.ROLLED_BACK
+
+    def test_deploying_rev_none_all_routes_classified_as_blue(self) -> None:
+        """If deploying_revision_id is None, all routes classified as blue."""
+        deployment = make_deployment(desired=1, deploying_revision_id=None)  # type: ignore[arg-type]
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        routes = [make_route(revision_id=OLD_REV, status=RouteStatus.HEALTHY)]
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        # All classified as blue (not green), no green → PROVISIONING with create
+        assert result.sub_step == DeploymentSubStep.PROVISIONING
+        assert _count_scale_out(result) == 1
+
+    def test_route_without_revision_classified_as_blue(self) -> None:
+        """Routes with revision_id=None are classified as blue (non-green)."""
+        deployment = make_deployment(desired=1)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        routes = [make_route(revision_id=None, status=RouteStatus.HEALTHY)]  # type: ignore[arg-type]
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        # revision_id=None != NEW_REV, so classified as blue
+        assert _count_scale_out(result) == 1
+
+    def test_provisioning_prioritized_over_promotion(self) -> None:
+        """PROVISIONING check comes before promotion check."""
+        deployment = make_deployment(desired=1)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        routes = (
+            _blue_routes(1)
+            + _green_routes(1, status=RouteStatus.HEALTHY)
+            + _green_routes(1, status=RouteStatus.PROVISIONING)
+        )
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        # Even though green_healthy >= desired, PROVISIONING takes precedence
+        assert result.sub_step == DeploymentSubStep.PROVISIONING
+        assert not result.completed
+
+    def test_large_desired_creates_all(self) -> None:
+        """Large desired (10) creates all green at once."""
+        deployment = make_deployment(desired=10)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        routes = _blue_routes(10)
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert _count_scale_out(result) == 10
+
+    def test_large_desired_promotes_all(self) -> None:
+        """Large desired (10) promotes all green at once."""
+        deployment = make_deployment(desired=10)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        blues = _blue_routes(10)
+        greens = _green_routes(10, status=RouteStatus.HEALTHY)
+        routes = blues + greens
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.completed
+        assert len(_promote_ids(result)) == 10
+        assert len(_scale_in_ids(result)) == 10
+
+
+# ===========================================================================
+# 11. Realistic multi-step scenario (desired=5)
+# ===========================================================================
+
+
+class TestRealisticScenario:
+    """Simulate a realistic blue-green deployment with desired=5."""
+
+    def test_step_by_step_blue_green_deployment(self) -> None:
+        """Full simulation of a blue-green deployment across multiple cycles."""
+        deployment = make_deployment(desired=5)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+
+        # Cycle 1: 5 blue, 0 green → create all 5 green (INACTIVE)
+        blues = _blue_routes(5)
+        r1 = blue_green_evaluate(deployment, blues, spec)
+
+        assert r1.sub_step == DeploymentSubStep.PROVISIONING
+        assert _count_scale_out(r1) == 5
+        assert len(_scale_in_ids(r1)) == 0
+
+        # Cycle 2: 5 blue, 5 green PROVISIONING → wait
+        routes_c2 = blues + _green_routes(5, status=RouteStatus.PROVISIONING)
+        r2 = blue_green_evaluate(deployment, routes_c2, spec)
+
+        assert r2.sub_step == DeploymentSubStep.PROVISIONING
+        assert _count_scale_out(r2) == 0
+
+        # Cycle 3: 5 blue, 3 healthy + 2 provisioning → still PROVISIONING
+        routes_c3 = (
+            blues
+            + _green_routes(3, status=RouteStatus.HEALTHY)
+            + _green_routes(2, status=RouteStatus.PROVISIONING)
+        )
+        r3 = blue_green_evaluate(deployment, routes_c3, spec)
+
+        assert r3.sub_step == DeploymentSubStep.PROVISIONING
+
+        # Cycle 4: 5 blue, 4 healthy + 1 provisioning → still PROVISIONING
+        routes_c4 = (
+            blues
+            + _green_routes(4, status=RouteStatus.HEALTHY)
+            + _green_routes(1, status=RouteStatus.PROVISIONING)
+        )
+        r4 = blue_green_evaluate(deployment, routes_c4, spec)
+
+        assert r4.sub_step == DeploymentSubStep.PROVISIONING
+
+        # Cycle 5: 5 blue, 5 green healthy → completed (atomic promotion)
+        greens = _green_routes(5, status=RouteStatus.HEALTHY)
+        routes_c5 = blues + greens
+        r5 = blue_green_evaluate(deployment, routes_c5, spec)
+
+        assert r5.completed
+        assert len(_promote_ids(r5)) == 5
+        assert len(_scale_in_ids(r5)) == 5
+
+    def test_step_by_step_with_failure_rollback(self) -> None:
+        """Simulation of a blue-green deployment that fails and rolls back."""
+        deployment = make_deployment(desired=3)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+
+        # Cycle 1: 3 blue, 0 green → create 3 green
+        blues = _blue_routes(3)
+        r1 = blue_green_evaluate(deployment, blues, spec)
+        assert _count_scale_out(r1) == 3
+
+        # Cycle 2: 3 blue, 3 green PROVISIONING → wait
+        routes_c2 = blues + _green_routes(3, status=RouteStatus.PROVISIONING)
+        r2 = blue_green_evaluate(deployment, routes_c2, spec)
+        assert r2.sub_step == DeploymentSubStep.PROVISIONING
+
+        # Cycle 3: all green fail → ROLLED_BACK
+        greens_failed = _green_routes(3, status=RouteStatus.FAILED_TO_START)
+        routes_c3 = blues + greens_failed
+        r3 = blue_green_evaluate(deployment, routes_c3, spec)
+
+        assert r3.sub_step == DeploymentSubStep.ROLLED_BACK
+        assert not r3.completed
+        green_ids = {r.route_id for r in greens_failed}
+        assert set(_scale_in_ids(r3)) == green_ids
+
+    def test_step_by_step_manual_promotion(self) -> None:
+        """Simulation with auto_promote=False (manual promotion flow)."""
+        deployment = make_deployment(desired=3)
+
+        # Cycle 1: auto_promote=False, create green
+        spec_manual = BlueGreenSpec(auto_promote=False, promote_delay_seconds=0)
+        blues = _blue_routes(3)
+        r1 = blue_green_evaluate(deployment, blues, spec_manual)
+        assert _count_scale_out(r1) == 3
+
+        # Cycle 2: all green healthy, but auto_promote=False → PROGRESSING (wait)
+        routes_c2 = blues + _green_routes(3, status=RouteStatus.HEALTHY)
+        r2 = blue_green_evaluate(deployment, routes_c2, spec_manual)
+        assert r2.sub_step == DeploymentSubStep.PROGRESSING
+        assert not r2.completed
+        assert len(_promote_ids(r2)) == 0
+
+        # Cycle 3: admin switches to auto_promote=True → completed
+        spec_auto = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        r3 = blue_green_evaluate(deployment, routes_c2, spec_auto)
+        assert r3.completed
+        assert len(_promote_ids(r3)) == 3
+        assert len(_scale_in_ids(r3)) == 3
+
+
+# ===========================================================================
+# 12. desired_replica_count vs replica_count
+# ===========================================================================
+
+
+class TestDesiredReplicaCount:
+    """Test that the correct desired count is used."""
+
+    def test_desired_replica_count_overrides_replica_count(self) -> None:
+        """When desired_replica_count is set, it takes precedence."""
+        deployment = make_deployment(desired=3)
+        deployment.replica_spec = ReplicaSpec(
+            replica_count=1,
+            desired_replica_count=3,
+        )
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+
+        result = blue_green_evaluate(deployment, [], spec)
+
+        # desired is 3 (from desired_replica_count), not 1
+        assert _count_scale_out(result) == 3
+
+    def test_replica_count_used_when_no_desired(self) -> None:
+        """When desired_replica_count is None, uses replica_count."""
+        deployment = make_deployment(desired=2)
+        deployment.replica_spec = ReplicaSpec(
+            replica_count=2,
+            desired_replica_count=None,
+        )
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        routes = _green_routes(2, status=RouteStatus.HEALTHY)
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.completed
+
+    def test_desired_replica_count_determines_green_creation_count(self) -> None:
+        """desired_replica_count controls how many green routes are created."""
+        deployment = make_deployment(desired=5)
+        deployment.replica_spec = ReplicaSpec(
+            replica_count=2,
+            desired_replica_count=5,
+        )
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        routes = _blue_routes(2)
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert _count_scale_out(result) == 5
+
+
+# ===========================================================================
+# 13. Scale-down during blue-green deployment
+# ===========================================================================
+
+
+class TestScaleDownDuringBlueGreen:
+    """Test behavior when desired is reduced during blue-green deployment."""
+
+    def test_desired_reduced_fewer_green_needed(self) -> None:
+        """If desired is lowered during deployment, fewer green are healthy enough."""
+        deployment = make_deployment(desired=2)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        # 3 blue (original desired was 3), now desired=2
+        routes = _blue_routes(3) + _green_routes(2, status=RouteStatus.HEALTHY)
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        # green_healthy=2 >= desired=2 → completed
+        assert result.completed
+        assert len(_promote_ids(result)) == 2
+        assert len(_scale_in_ids(result)) == 3  # all 3 blue routes terminated
+
+    def test_desired_increased_needs_more_green(self) -> None:
+        """If desired is raised, green_healthy < new_desired → PROGRESSING."""
+        deployment = make_deployment(desired=5)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        routes = _blue_routes(3) + _green_routes(3, status=RouteStatus.HEALTHY)
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        # green_healthy=3 < desired=5 → PROGRESSING
+        assert result.sub_step == DeploymentSubStep.PROGRESSING
+        assert not result.completed
+
+
+# ===========================================================================
+# 14. No blue routes (fresh deployment)
+# ===========================================================================
+
+
+class TestNoBlueRoutes:
+    """When there are no blue routes (fresh deployment)."""
+
+    def test_fresh_deployment_creates_green(self) -> None:
+        """No blue, no green → create all desired green."""
+        deployment = make_deployment(desired=3)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+
+        result = blue_green_evaluate(deployment, [], spec)
+
+        assert result.sub_step == DeploymentSubStep.PROVISIONING
+        assert _count_scale_out(result) == 3
+
+    def test_promotion_no_blue(self) -> None:
+        """Promotion with no blue routes → complete with 0 scale_in."""
+        deployment = make_deployment(desired=3)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        greens = _green_routes(3, status=RouteStatus.HEALTHY)
+
+        result = blue_green_evaluate(deployment, greens, spec)
+
+        assert result.completed
+        green_ids = {r.route_id for r in greens}
+        assert set(_promote_ids(result)) == green_ids
+        assert len(_scale_in_ids(result)) == 0
+
+    def test_fresh_deployment_all_fail_rollback(self) -> None:
+        """Fresh deployment where all green routes fail."""
+        deployment = make_deployment(desired=3)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        greens = _green_routes(3, status=RouteStatus.FAILED_TO_START)
+
+        result = blue_green_evaluate(deployment, greens, spec)
+
+        assert result.sub_step == DeploymentSubStep.ROLLED_BACK
+
+
+# ===========================================================================
+# 15. Concurrent provisioning checks
+# ===========================================================================
+
+
+class TestConcurrentProvisioningChecks:
+    """Test that provisioning blocks further changes correctly."""
+
+    def test_provisioning_blocks_promotion(self) -> None:
+        """Any green route in PROVISIONING → wait, even if enough healthy for promotion."""
+        deployment = make_deployment(desired=2)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        routes = (
+            _blue_routes(2)
+            + _green_routes(2, status=RouteStatus.HEALTHY)
+            + _green_routes(1, status=RouteStatus.PROVISIONING)
+        )
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        # PROVISIONING takes priority over promotion
+        assert result.sub_step == DeploymentSubStep.PROVISIONING
+        assert not result.completed
+        assert len(_promote_ids(result)) == 0
+        assert len(_scale_in_ids(result)) == 0
+
+    def test_multiple_provisioning_routes_still_waits(self) -> None:
+        """Multiple PROVISIONING routes → still PROVISIONING."""
+        deployment = make_deployment(desired=3)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        routes = _green_routes(3, status=RouteStatus.PROVISIONING)
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.sub_step == DeploymentSubStep.PROVISIONING
+
+    def test_provisioning_blocks_even_with_auto_promote_false(self) -> None:
+        """PROVISIONING still blocks with auto_promote=False."""
+        deployment = make_deployment(desired=2)
+        spec = BlueGreenSpec(auto_promote=False, promote_delay_seconds=0)
+        routes = (
+            _blue_routes(2)
+            + _green_routes(1, status=RouteStatus.HEALTHY)
+            + _green_routes(1, status=RouteStatus.PROVISIONING)
+        )
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.sub_step == DeploymentSubStep.PROVISIONING
+
+    def test_no_actions_during_provisioning_wait(self) -> None:
+        """During PROVISIONING wait, no route changes should be emitted."""
+        deployment = make_deployment(desired=3)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        routes = _blue_routes(3) + _green_routes(3, status=RouteStatus.PROVISIONING)
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert _count_scale_out(result) == 0
+        assert len(_scale_in_ids(result)) == 0
+        assert len(_promote_ids(result)) == 0
+
+
+# ===========================================================================
+# 16. Atomicity of promotion
+# ===========================================================================
+
+
+class TestAtomicPromotion:
+    """Test that promotion is atomic (all green promoted + all blue terminated at once)."""
+
+    def test_promotion_is_all_or_nothing(self) -> None:
+        """On promotion, ALL healthy green are promoted and ALL active blue are terminated."""
+        deployment = make_deployment(desired=5)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        blues = _blue_routes(5)
+        greens = _green_routes(5, status=RouteStatus.HEALTHY)
+        routes = blues + greens
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.completed
+        assert len(_promote_ids(result)) == 5
+        assert len(_scale_in_ids(result)) == 5
+        assert _count_scale_out(result) == 0
+
+    def test_no_partial_promotion(self) -> None:
+        """With green < desired, no partial promotion happens."""
+        deployment = make_deployment(desired=3)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        routes = _blue_routes(3) + _green_routes(2, status=RouteStatus.HEALTHY)
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        # Not enough green healthy → no promotion
+        assert not result.completed
+        assert len(_promote_ids(result)) == 0
+        assert len(_scale_in_ids(result)) == 0
+
+    def test_promotion_with_asymmetric_blue_green_count(self) -> None:
+        """Blue=3, Green=5 (desired=5) → all green promoted, all blue terminated."""
+        deployment = make_deployment(desired=5)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        blues = _blue_routes(3)
+        greens = _green_routes(5, status=RouteStatus.HEALTHY)
+        routes = blues + greens
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.completed
+        assert len(_promote_ids(result)) == 5
+        assert len(_scale_in_ids(result)) == 3
+
+    def test_promotion_with_more_blue_than_green(self) -> None:
+        """Blue=5, Green=3 (desired=3) → all green promoted, all blue terminated."""
+        deployment = make_deployment(desired=3)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        blues = _blue_routes(5)
+        greens = _green_routes(3, status=RouteStatus.HEALTHY)
+        routes = blues + greens
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.completed
+        assert len(_promote_ids(result)) == 3
+        assert len(_scale_in_ids(result)) == 5
+
+
+# ===========================================================================
+# 17. Idempotency and repeated evaluations
+# ===========================================================================
+
+
+class TestIdempotency:
+    """Test that repeated evaluations with the same state produce the same result."""
+
+    def test_repeated_provisioning_evaluation(self) -> None:
+        """Same PROVISIONING state evaluated twice → same result."""
+        deployment = make_deployment(desired=3)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        routes = _blue_routes(3) + _green_routes(3, status=RouteStatus.PROVISIONING)
+
+        r1 = blue_green_evaluate(deployment, routes, spec)
+        r2 = blue_green_evaluate(deployment, routes, spec)
+
+        assert r1.sub_step == r2.sub_step == DeploymentSubStep.PROVISIONING
+        assert r1.completed == r2.completed is False
+
+    def test_repeated_completion_evaluation(self) -> None:
+        """Same completion state evaluated twice → same result."""
+        deployment = make_deployment(desired=3)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        blues = _blue_routes(3)
+        greens = _green_routes(3, status=RouteStatus.HEALTHY)
+        routes = blues + greens
+
+        r1 = blue_green_evaluate(deployment, routes, spec)
+        r2 = blue_green_evaluate(deployment, routes, spec)
+
+        assert r1.completed == r2.completed is True
+        assert len(_promote_ids(r1)) == len(_promote_ids(r2)) == 3
+
+    def test_repeated_rollback_evaluation(self) -> None:
+        """Same rollback state evaluated twice → same result."""
+        deployment = make_deployment(desired=3)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        routes = _blue_routes(3) + _green_routes(3, status=RouteStatus.FAILED_TO_START)
+
+        r1 = blue_green_evaluate(deployment, routes, spec)
+        r2 = blue_green_evaluate(deployment, routes, spec)
+
+        assert r1.sub_step == r2.sub_step == DeploymentSubStep.ROLLED_BACK
+
+
+# ===========================================================================
+# 18. Spec parameter boundary values
+# ===========================================================================
+
+
+class TestSpecBoundaryValues:
+    """Test boundary values for BlueGreenSpec parameters."""
+
+    def test_promote_delay_zero_promotes(self) -> None:
+        """promote_delay_seconds=0 → immediate promotion."""
+        deployment = make_deployment(desired=1)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        routes = _blue_routes(1) + _green_routes(1, status=RouteStatus.HEALTHY)
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.completed
+
+    def test_promote_delay_large_waits(self) -> None:
+        """promote_delay_seconds=3600 (1 hour) → PROGRESSING (delay wait)."""
+        deployment = make_deployment(desired=1)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=3600)
+        routes = _blue_routes(1) + _green_routes(1, status=RouteStatus.HEALTHY)
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert not result.completed
+        assert result.sub_step == DeploymentSubStep.PROGRESSING
+
+    def test_promote_delay_max_int_waits(self) -> None:
+        """Very large delay → PROGRESSING (delay wait)."""
+        deployment = make_deployment(desired=1)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=999999)
+        routes = _blue_routes(1) + _green_routes(1, status=RouteStatus.HEALTHY)
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert not result.completed
+
+    def test_promote_delay_irrelevant_when_not_auto(self) -> None:
+        """When auto_promote=False, promote_delay_seconds is ignored."""
+        deployment = make_deployment(desired=1)
+        spec = BlueGreenSpec(auto_promote=False, promote_delay_seconds=0)
+        routes = _blue_routes(1) + _green_routes(1, status=RouteStatus.HEALTHY)
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        # auto_promote=False → manual wait, delay doesn't matter
+        assert not result.completed
+        assert result.sub_step == DeploymentSubStep.PROGRESSING
+
+
+# ===========================================================================
+# 19. Green route healthy count vs desired
+# ===========================================================================
+
+
+class TestGreenHealthyVsDesired:
+    """Test how green healthy count interacts with desired."""
+
+    def test_green_healthy_exactly_desired_promotes(self) -> None:
+        """green_healthy == desired → promotes."""
+        deployment = make_deployment(desired=3)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        routes = _blue_routes(3) + _green_routes(3, status=RouteStatus.HEALTHY)
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.completed
+
+    def test_green_healthy_one_less_than_desired_waits(self) -> None:
+        """green_healthy == desired - 1 → PROGRESSING."""
+        deployment = make_deployment(desired=3)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        routes = _blue_routes(3) + _green_routes(2, status=RouteStatus.HEALTHY)
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.sub_step == DeploymentSubStep.PROGRESSING
+        assert not result.completed
+
+    def test_green_healthy_more_than_desired_promotes(self) -> None:
+        """green_healthy > desired → still promotes."""
+        deployment = make_deployment(desired=2)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        routes = _blue_routes(2) + _green_routes(4, status=RouteStatus.HEALTHY)
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        assert result.completed
+        assert len(_promote_ids(result)) == 4
+
+    def test_green_healthy_zero_desired_nonzero_waits(self) -> None:
+        """0 healthy green, desired > 0 → PROGRESSING."""
+        deployment = make_deployment(desired=3)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        routes = _blue_routes(3)  # no green at all
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        # No green live → PROVISIONING (create green)
+        assert result.sub_step == DeploymentSubStep.PROVISIONING
+
+
+# ===========================================================================
+# 20. Multiple deployments with different endpoint IDs
+# ===========================================================================
+
+
+class TestDifferentEndpointIds:
+    """Test that the FSM correctly handles different endpoint IDs."""
+
+    def test_different_endpoint_does_not_interfere(self) -> None:
+        """Routes from different endpoints are processed independently."""
+        ep1 = UUID("11111111-0000-0000-0000-000000000001")
+
+        deployment = make_deployment(desired=2, endpoint_id=ep1)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+
+        # Routes for ep1
+        routes = [
+            make_route(revision_id=OLD_REV, status=RouteStatus.HEALTHY, endpoint_id=ep1),
+            make_route(revision_id=OLD_REV, status=RouteStatus.HEALTHY, endpoint_id=ep1),
+        ]
+
+        result = blue_green_evaluate(deployment, routes, spec)
+
+        # Only ep1 routes → no green, create 2
+        assert _count_scale_out(result) == 2
+
+    def test_routes_for_other_endpoint_in_list(self) -> None:
+        """Routes for other endpoints are treated as blue routes (different revision)."""
+        deployment = make_deployment(desired=1)
+        spec = BlueGreenSpec(auto_promote=True, promote_delay_seconds=0)
+        green = _green_routes(1, status=RouteStatus.HEALTHY)
+
+        result = blue_green_evaluate(deployment, green, spec)
+
+        assert result.completed
+        assert len(_promote_ids(result)) == 1