diff --git a/.flake8 b/.flake8
index 13cb9ba1..b32f0349 100644
--- a/.flake8
+++ b/.flake8
@@ -3,3 +3,4 @@ max-line-length = 120
 # Ignore E402: module-level import not at top of file
 # Ignore W503: line break before binary operator (incompatible with W504)
 ignore = E402,W503
+exclude = .venv
diff --git a/.gitignore b/.gitignore
index 51130c5b..9807bf14 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,6 +11,9 @@
 *.sln.docstates
 *.env
 
+# Environment files
+.venv/
+
 # User-specific files (MonoDevelop/Xamarin Studio)
 *.userprefs
 
diff --git a/simulator/__init__.py b/simulator/__init__.py
index e69de29b..263309ff 100644
--- a/simulator/__init__.py
+++ b/simulator/__init__.py
@@ -0,0 +1,15 @@
+"""
+Simulator package — provisioning sweeps, multi-request analysis, and plotting
+on top of the model_provisioner allocation policies.
+
+The allocation policy implementations live in ``streamwise/model_provisioner/``.
+"""
+import os
+import sys
+
+# Make model_provisioner importable for simulator modules.
+_STREAMWISE_DIR = os.path.normpath(
+    os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "streamwise")
+)
+if _STREAMWISE_DIR not in sys.path:
+    sys.path.insert(0, _STREAMWISE_DIR)
diff --git a/simulator/actions.py b/simulator/actions.py
index debea677..69af1618 100644
--- a/simulator/actions.py
+++ b/simulator/actions.py
@@ -27,7 +27,7 @@
 from sim_types import Objective
 from sim_types import Policy
 
-from policies import STREAMWISE_POLICY
+from model_provisioner.policies import STREAMWISE_POLICY
 
 from models import get_model_allocation
 
diff --git a/simulator/auto_model_allocator.py b/simulator/auto_model_allocator.py
index ea0fda61..3ca86cb7 100644
--- a/simulator/auto_model_allocator.py
+++ b/simulator/auto_model_allocator.py
@@ -19,7 +19,7 @@
 from sim_types import GPUType
 from sim_types import Result
 
-from policies import STREAMWISE_POLICY
+from model_provisioner.policies import STREAMWISE_POLICY
 
 from model_allocator import ModelAllocator
 
@@ -47,7 +47,7 @@ def __init__(
     def _build_allocator(self) -> ModelAllocator:
         """Create concrete allocator based on configured solver."""
         if self.policy.solver == Solver.GREEDY:
-            from greedy import GreedyAllocator
+            from model_provisioner.greedy import GreedyAllocator
             return GreedyAllocator(
                 workflow=self.workflow,
                 latency_data=self.latency_data,
@@ -55,7 +55,7 @@ def _build_allocator(self) -> ModelAllocator:
                 policy=self.policy,
             )
         if self.policy.solver == Solver.NAIVE:
-            from naive_baseline import NaiveAllocator
+            from model_provisioner.naive_baseline import NaiveAllocator
             return NaiveAllocator(
                 workflow=self.workflow,
                 latency_data=self.latency_data,
@@ -63,7 +63,7 @@ def _build_allocator(self) -> ModelAllocator:
                 policy=self.policy,
             )
         if self.policy.solver in {Solver.GUROBI, Solver.HIGHS}:
-            from milp import MILPAllocator
+            from model_provisioner.milp import MILPAllocator
             return MILPAllocator(
                 workflow=self.workflow,
                 latency_data=self.latency_data,
@@ -71,7 +71,7 @@ def _build_allocator(self) -> ModelAllocator:
                 policy=self.policy,
             )
         if self.policy.solver == Solver.HEXGEN:
-            from hexgen import HexGenAllocator
+            from model_provisioner.hexgen import HexGenAllocator
             return HexGenAllocator(
                 workflow=self.workflow,
                 latency_data=self.latency_data,
@@ -79,7 +79,7 @@ def _build_allocator(self) -> ModelAllocator:
                 policy=self.policy,
             )
         if self.policy.solver == Solver.HELIX:
-            from helix import HelixAllocator
+            from model_provisioner.helix import HelixAllocator
             return HelixAllocator(
                 workflow=self.workflow,
                 latency_data=self.latency_data,
diff --git a/simulator/data_loading.py b/simulator/data_loading.py
index 6ee59ec5..af37e5b8 100644
--- a/simulator/data_loading.py
+++ b/simulator/data_loading.py
@@ -28,15 +28,17 @@
 from constants import POWER_GPU_IDLE
 from constants import POWER_GPU_TDP
 
+_DEFAULT_DATA_DIR = Path(__file__).resolve().parent / "data"
+
 
 def load_latency_data(
-    data_dir: str = "data/",
+    data_dir: str | Path = _DEFAULT_DATA_DIR,
 ) -> LatencyData:
     """
     Load latency and throughput mapping data from CSV files.
 
     Args:
-        data_dir (str): The directory where the CSV files are stored.
+        data_dir: The directory where the CSV files are stored.
     Returns:
         LatencyData: An object containing all loaded latency data.
     """
@@ -107,13 +109,13 @@ def load_latency_data(
 
 
 def load_power_data(
-    data_dir: str = "data/"
+    data_dir: str | Path = _DEFAULT_DATA_DIR
 ) -> PowerData:
     """
     Load power consumption data from CSV files.
 
     Args:
-        data_dir (str): The directory where the CSV files are stored.
+        data_dir: The directory where the CSV files are stored.
     Returns:
         PowerData: An object containing all loaded power consumption data.
     """
@@ -216,7 +218,7 @@ def load_power_data(
 
 
 def load_adaptive_quality_data(
-    data_dir: str,
+    data_dir: str | Path,
     level: QualityLevel,
 ) -> LatencyData:
     """Load latency data for adaptive quality."""
diff --git a/simulator/model_allocator.py b/simulator/model_allocator.py
index ab1c7e39..0f773a51 100644
--- a/simulator/model_allocator.py
+++ b/simulator/model_allocator.py
@@ -27,7 +27,7 @@
 from models import UpscalerModelAllocation
 from models import OthersModelAllocation
 
-from policies import NAIVE_POLICY
+from model_provisioner.policies import NAIVE_POLICY
 
 
 class ModelAllocator(ABC):
diff --git a/simulator/multirequests.py b/simulator/multirequests.py
index 4fee5d55..a8d87a8b 100644
--- a/simulator/multirequests.py
+++ b/simulator/multirequests.py
@@ -18,7 +18,7 @@
 
 from workflows import PODCAST_WORKFLOW
 
-from policies import STREAMWISE_POLICY
+from model_provisioner.policies import STREAMWISE_POLICY
 
 from auto_model_allocator import AutoModelAllocator
 
diff --git a/simulator/provisioning.py b/simulator/provisioning.py
index 43612b53..dd4f2a89 100644
--- a/simulator/provisioning.py
+++ b/simulator/provisioning.py
@@ -33,7 +33,7 @@
 
 from auto_model_allocator import AutoModelAllocator
 
-from policies import STREAMWISE_POLICY
+from model_provisioner.policies import STREAMWISE_POLICY
 
 from constants import SECONDS_IN_HOUR
 
diff --git a/streamwise/allocator_bridge.py b/streamwise/allocator_bridge.py
new file mode 100644
index 00000000..b1e610d2
--- /dev/null
+++ b/streamwise/allocator_bridge.py
@@ -0,0 +1,250 @@
+"""
+Bridge between the model provisioner's allocator output and StreamWise pod deployment.
+
+Translates ModelAllocation results (abstract Model enum + GPU counts) into concrete
+container deployment parameters compatible with pod_manager.add_pod().
+"""
+
+from __future__ import annotations
+
+import os
+
+import model_provisioner  # noqa: F401 — adds simulator/ to sys.path
+
+from dataclasses import dataclass
+from typing import Optional
+
+from sim_types import GPUType
+from sim_types import Model
+from sim_types import Result
+
+from auto_model_allocator import AutoModelAllocator
+from data_loading import load_latency_data
+from model_provisioner.policies import STREAMWISE_POLICY
+from workflows import WORKFLOWS
+
+
+# Mapping from simulator Model enum to concrete container names used by pod_manager.
+# Some Model entries map to multiple containers (e.g., OTHERS -> kokoro + yolo).
+MODEL_TO_CONTAINERS: dict[Model, list[str]] = {
+    Model.GEMMA: ["gemma"],
+    Model.FLUX: ["flux"],
+    Model.HF: ["hunyuanframepackf1"],
+    Model.HF_VAE: ["hunyuanframepackvae"],
+    Model.FT: ["fantasytalking"],
+    Model.FT_VAE: [],  # FT_VAE is handled within fantasytalking container
+    Model.UPSCALER: ["realesrgan"],
+    Model.OTHERS: ["kokoro", "yolo"],
+}
+
+# Default CPU/memory/storage for each container when deployed via auto-deploy.
+# Format: (cpu_cores, memory_gib, ephemeral_storage_gib)
+CONTAINER_RESOURCES: dict[str, tuple[int, int, int]] = {
+    "gemma": (16, 192, 64),
+    "flux": (12, 128, 64),
+    "hunyuanframepackf1": (24, 128, 64),
+    "hunyuanframepackvae": (4, 32, 16),
+    "fantasytalking": (12, 192, 64),
+    "realesrgan": (4, 32, 16),
+    "kokoro": (2, 8, 16),
+    "yolo": (4, 8, 16),
+}
+
+# GPU type string used by pod_manager (lowercase)
+GPU_TYPE_TO_POD_STR: dict[GPUType, str] = {
+    GPUType.A100: "a100",
+    GPUType.H100: "h100",
+    GPUType.H200: "h200",
+    GPUType.GB200: "gb200",
+}
+
+# MIG containers: these use a MIG slice instead of a full GPU
+MIG_CONTAINERS: dict[str, str] = {
+    "kokoro": "1g.10gb",
+    "yolo": "1g.10gb",
+    "realesrgan": "1g.10gb",
+}
+
+# Mapping from StreamWise app name to simulator workflow key
+APP_TO_WORKFLOW: dict[str, str] = {
+    "streamcast": "podcast",
+    "streampersona": "slide",
+    "streamchat": "chat",
+    "streamshort": "short",
+    "streammovie": "movie",
+    "streamanimate": "story",
+    "streamlecture": "lecture",
+    "streamdub": "dubbing",
+    "streamedit": "editing",
+}
+
+
+@dataclass
+class DeploymentSpec:
+    """A single container deployment specification."""
+    container_name: str
+    cpu: int
+    memory_gib: int
+    ephemeral_storage_gib: int
+    gpu: int
+    gpu_type: Optional[str]
+    mig_profile: Optional[str]
+
+
+@dataclass
+class DeploymentPlan:
+    """Complete deployment plan produced by the auto-allocator."""
+    specs: list[DeploymentSpec]
+    result: Result
+    workflow_name: str
+    gpu_budget: dict[str, int]
+
+
+def _get_data_dir() -> str:
+    """Get the path to the simulator data directory."""
+    default_path = os.path.join(os.path.dirname(__file__), "..", "simulator", "data")
+    return os.getenv("SIMULATOR_DATA_DIR", default_path)
+
+
+def get_available_workflows() -> list[str]:
+    """Return list of available workflow names for the UI."""
+    return list(APP_TO_WORKFLOW.keys())
+
+
+def get_available_gpu_types() -> list[str]:
+    """Return list of available GPU type strings for the UI."""
+    return [gpu_type.value for gpu_type in GPUType]
+
+
+def run_allocator(
+    gpu_budget: dict[str, int],
+    workflow_name: str,
+) -> DeploymentPlan:
+    """
+    Run the greedy model allocator and return a deployment plan.
+
+    Args:
+        gpu_budget: GPU counts keyed by GPU type string (e.g., {"A100": 8, "H100": 0}).
+        workflow_name: StreamWise app name (e.g., "streamcast").
+
+    Returns:
+        DeploymentPlan with concrete container deployment specs.
+
+    Raises:
+        ValueError: If workflow_name or GPU types are invalid.
+    """
+    # Validate workflow
+    workflow_key = APP_TO_WORKFLOW.get(workflow_name)
+    if workflow_key is None:
+        raise ValueError(
+            f"Unknown workflow '{workflow_name}'. "
+            f"Available: {list(APP_TO_WORKFLOW.keys())}")
+
+    workflow = WORKFLOWS[workflow_key]
+
+    # Parse GPU budget into GPUType enum
+    num_gpus: dict[GPUType, int] = {}
+    for gpu_str, count in gpu_budget.items():
+        try:
+            gpu_type = GPUType(gpu_str)
+        except ValueError:
+            raise ValueError(
+                f"Unknown GPU type '{gpu_str}'. "
+                f"Available: {[g.value for g in GPUType]}")
+        if count > 0:
+            num_gpus[gpu_type] = count
+
+    if not num_gpus or sum(num_gpus.values()) < 8:
+        raise ValueError("Total GPU budget must be at least 8 GPUs.")
+
+    # Load latency data and run allocator
+    data_dir = _get_data_dir()
+    latency_data = load_latency_data(data_dir=data_dir)
+
+    allocator = AutoModelAllocator(
+        workflow=workflow,
+        latency_data=latency_data,
+        policy=STREAMWISE_POLICY,
+    )
+
+    result = allocator.allocate(num_gpus=num_gpus, verbose=False)
+
+    # Convert result to deployment specs
+    specs = result_to_deployment_specs(result)
+
+    return DeploymentPlan(
+        specs=specs,
+        result=result,
+        workflow_name=workflow_name,
+        gpu_budget=gpu_budget,
+    )
+
+
+def result_to_deployment_specs(result: Result) -> list[DeploymentSpec]:
+    """
+    Convert an allocator Result into a list of DeploymentSpec objects.
+
+    Each ModelAllocation with replicas > 0 is mapped to one or more container deployments.
+    """
+    specs: list[DeploymentSpec] = []
+
+    for gpu_type, model_dict in result.models.items():
+        gpu_type_str = GPU_TYPE_TO_POD_STR[gpu_type]
+
+        for model, allocations in model_dict.items():
+            containers = MODEL_TO_CONTAINERS.get(model, [])
+            if not containers:
+                continue
+
+            for allocation in allocations:
+                if allocation.replicas <= 0:
+                    continue
+
+                for container_name in containers:
+                    resources = CONTAINER_RESOURCES.get(container_name, (4, 16, 16))
+                    cpu, memory_gib, ephemeral_storage_gib = resources
+
+                    mig_profile = MIG_CONTAINERS.get(container_name)
+                    gpu_count = allocation.devices if not mig_profile else 1
+
+                    for _ in range(allocation.replicas):
+                        specs.append(DeploymentSpec(
+                            container_name=container_name,
+                            cpu=cpu,
+                            memory_gib=memory_gib,
+                            ephemeral_storage_gib=ephemeral_storage_gib,
+                            gpu=gpu_count,
+                            gpu_type=gpu_type_str,
+                            mig_profile=mig_profile,
+                        ))
+
+    return specs
+
+
+def deployment_plan_to_json(plan: DeploymentPlan) -> dict:
+    """Serialize a DeploymentPlan to a JSON-friendly dict."""
+    return {
+        "workflow_name": plan.workflow_name,
+        "gpu_budget": plan.gpu_budget,
+        "metrics": {
+            "total_time_s": round(plan.result.total_time_s, 2),
+            "ttff_s": round(plan.result.ttff_s, 2),
+            "cost": round(plan.result.cost, 4),
+            "gpus_used": {
+                gpu_type.value: count
+                for gpu_type, count in plan.result.gpus_used.items()
+            },
+        },
+        "specs": [
+            {
+                "container_name": spec.container_name,
+                "cpu": spec.cpu,
+                "memory_gib": spec.memory_gib,
+                "ephemeral_storage_gib": spec.ephemeral_storage_gib,
+                "gpu": spec.gpu,
+                "gpu_type": spec.gpu_type,
+                "mig_profile": spec.mig_profile,
+            }
+            for spec in plan.specs
+        ],
+    }
diff --git a/streamwise/model_provisioner/__init__.py b/streamwise/model_provisioner/__init__.py
new file mode 100644
index 00000000..c79b0cde
--- /dev/null
+++ b/streamwise/model_provisioner/__init__.py
@@ -0,0 +1,15 @@
+"""
+Model Provisioner — allocation policy implementations for GPU resource distribution.
+
+Contains greedy, naive, MILP, HexGen, and Helix allocation strategies.
+The foundation types (sim_types, constants, models, etc.) live in simulator/.
+"""
+import os
+import sys
+
+# Add simulator/ to sys.path so policy files can import foundation modules.
+_SIMULATOR_DIR = os.path.normpath(
+    os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..", "simulator")
+)
+if _SIMULATOR_DIR not in sys.path:
+    sys.path.insert(0, _SIMULATOR_DIR)
diff --git a/simulator/greedy.py b/streamwise/model_provisioner/greedy.py
similarity index 99%
rename from simulator/greedy.py
rename to streamwise/model_provisioner/greedy.py
index 459742e5..8c1a1dd0 100644
--- a/simulator/greedy.py
+++ b/streamwise/model_provisioner/greedy.py
@@ -33,9 +33,9 @@
 
 from model_allocator import ModelAllocator
 
-from policies import STREAMWISE_POLICY
-from policies import MAX_ITERATIONS
-from policies import USE_ALL_GPUS
+from .policies import STREAMWISE_POLICY
+from .policies import MAX_ITERATIONS
+from .policies import USE_ALL_GPUS
 
 from actions import gen_actions
 from actions import choose_action
diff --git a/simulator/helix.py b/streamwise/model_provisioner/helix.py
similarity index 99%
rename from simulator/helix.py
rename to streamwise/model_provisioner/helix.py
index 5891538f..e8fededf 100644
--- a/simulator/helix.py
+++ b/streamwise/model_provisioner/helix.py
@@ -43,10 +43,10 @@
 
 from evaluator import evaluate_model_allocation
 
-from milp import MILPAllocator
+from .milp import MILPAllocator
 
-from policies import HELIX_POLICY
-from policies import MAX_DEVICES
+from .policies import HELIX_POLICY
+from .policies import MAX_DEVICES
 
 from constants import DEVICE_OPTIONS
 
diff --git a/simulator/hexgen.py b/streamwise/model_provisioner/hexgen.py
similarity index 99%
rename from simulator/hexgen.py
rename to streamwise/model_provisioner/hexgen.py
index 64c64160..4f37768a 100644
--- a/simulator/hexgen.py
+++ b/streamwise/model_provisioner/hexgen.py
@@ -30,15 +30,15 @@
 from evaluator import calc_used_gpus
 from evaluator import evaluate_model_allocation
 
-from greedy import GreedyAllocator
+from .greedy import GreedyAllocator
 
 from actions import gen_actions
 from actions import choose_action
 from actions import apply_action
 
-from policies import HEXGEN_POLICY
-from policies import MAX_ITERATIONS
-from policies import USE_ALL_GPUS
+from .policies import HEXGEN_POLICY
+from .policies import MAX_ITERATIONS
+from .policies import USE_ALL_GPUS
 
 
 def _get_model_order(workflow: WorkflowConfig) -> list[Model]:
diff --git a/simulator/milp.py b/streamwise/model_provisioner/milp.py
similarity index 99%
rename from simulator/milp.py
rename to streamwise/model_provisioner/milp.py
index 7a84e754..67749258 100644
--- a/simulator/milp.py
+++ b/streamwise/model_provisioner/milp.py
@@ -40,7 +40,7 @@
 from constants import NUM_GPUS_PER_SERVER
 from constants import SECONDS_IN_HOUR
 
-from policies import STREAMWISE_MILP_POLICY
+from .policies import STREAMWISE_MILP_POLICY
 
 
 MAX_INSTANCES = 16
diff --git a/simulator/naive_baseline.py b/streamwise/model_provisioner/naive_baseline.py
similarity index 99%
rename from simulator/naive_baseline.py
rename to streamwise/model_provisioner/naive_baseline.py
index 9f9c550c..ec95904e 100644
--- a/simulator/naive_baseline.py
+++ b/streamwise/model_provisioner/naive_baseline.py
@@ -31,8 +31,8 @@
 
 from evaluator import evaluate_model_allocation
 
-from policies import NAIVE_POLICY
-from policies import MAX_DEVICES
+from .policies import NAIVE_POLICY
+from .policies import MAX_DEVICES
 
 from model_allocator import ModelAllocator
 
diff --git a/simulator/policies.py b/streamwise/model_provisioner/policies.py
similarity index 100%
rename from simulator/policies.py
rename to streamwise/model_provisioner/policies.py
diff --git a/streamwise/streamwise.py b/streamwise/streamwise.py
index 1c63eacf..0ce24ac5 100644
--- a/streamwise/streamwise.py
+++ b/streamwise/streamwise.py
@@ -34,6 +34,7 @@
 import pod_manager
 import node_manager
 import job_manager
+import allocator_bridge
 
 from service_manager import get_services
 from service_manager import get_service_timestamps
@@ -726,6 +727,123 @@ async def api_add_pod() -> QuartReturn:
         return jsonify({"error": str(ex)}), HTTPStatus.INTERNAL_SERVER_ERROR
 
 
+@route("/api/auto_deploy", methods=["POST"])
+async def api_auto_deploy() -> QuartReturn:
+    """Run the model allocator to produce an optimized deployment plan.
+
+    Expects JSON body:
+        {
+            "gpu_budget": {"A100": 8, "H100": 0, ...},
+            "workflow": "streamcast"
+        }
+
+    Returns the deployment plan with estimated metrics and per-container specs.
+    """
+    try:
+        data = await request.get_json()
+        if not data:
+            return jsonify({"error": "Request body must be JSON"}), HTTPStatus.BAD_REQUEST
+
+        gpu_budget = data.get("gpu_budget")
+        workflow_name = data.get("workflow")
+
+        if not gpu_budget or not isinstance(gpu_budget, dict):
+            return jsonify({"error": "Missing or invalid 'gpu_budget' field"}), HTTPStatus.BAD_REQUEST
+        if not workflow_name or not isinstance(workflow_name, str):
+            return jsonify({"error": "Missing or invalid 'workflow' field"}), HTTPStatus.BAD_REQUEST
+
+        plan = allocator_bridge.run_allocator(
+            gpu_budget=gpu_budget,
+            workflow_name=workflow_name,
+        )
+        return jsonify(allocator_bridge.deployment_plan_to_json(plan)), HTTPStatus.OK
+
+    except ValueError as ve:
+        return jsonify({"error": str(ve)}), HTTPStatus.BAD_REQUEST
+    except Exception as ex:
+        logging.exception("Error in auto_deploy: %s", ex)
+        return jsonify({"error": str(ex)}), HTTPStatus.INTERNAL_SERVER_ERROR
+
+
+@route("/api/auto_deploy/confirm", methods=["POST"])
+async def api_auto_deploy_confirm() -> QuartReturn:
+    """Execute a deployment plan produced by /api/auto_deploy.
+
+    Expects JSON body:
+        {
+            "specs": [
+                {
+                    "container_name": "gemma",
+                    "cpu": 16,
+                    "memory_gib": 192,
+                    "ephemeral_storage_gib": 64,
+                    "gpu": 2,
+                    "gpu_type": "a100",
+                    "mig_profile": null
+                },
+                ...
+            ]
+        }
+
+    Deploys all containers in the plan.
+    """
+    try:
+        data = await request.get_json()
+        if not data:
+            return jsonify({"error": "Request body must be JSON"}), HTTPStatus.BAD_REQUEST
+
+        specs = data.get("specs")
+        if not specs or not isinstance(specs, list):
+            return jsonify({"error": "Missing or invalid 'specs' field"}), HTTPStatus.BAD_REQUEST
+
+        deployed: List[str] = []
+        errors: List[str] = []
+
+        for spec in specs:
+            container_name = spec.get("container_name")
+            if not container_name:
+                errors.append("Spec missing 'container_name'")
+                continue
+
+            try:
+                await pod_manager.add_pod(
+                    container_name=container_name,
+                    cpu=int(spec.get("cpu", 4)),
+                    memory_gib=int(spec.get("memory_gib", 16)),
+                    ephemeral_storage_gib=int(spec.get("ephemeral_storage_gib", 16)),
+                    gpu=int(spec.get("gpu", 0)),
+                    gpu_type=spec.get("gpu_type"),
+                    mig_profile=spec.get("mig_profile"),
+                    namespace=NAMESPACE,
+                    k8s_cluster=k8s_cluster,
+                )
+                deployed.append(container_name)
+            except Exception as pod_ex:
+                msg = f"Failed to deploy '{container_name}': {pod_ex}"
+                logging.error(msg)
+                errors.append(msg)
+
+        status = HTTPStatus.OK if not errors else HTTPStatus.MULTI_STATUS
+        return jsonify({
+            "deployed": deployed,
+            "errors": errors,
+            "message": f"Deployed {len(deployed)}/{len(specs)} containers.",
+        }), status
+
+    except Exception as ex:
+        logging.exception("Error in auto_deploy/confirm: %s", ex)
+        return jsonify({"error": str(ex)}), HTTPStatus.INTERNAL_SERVER_ERROR
+
+
+@route("/api/auto_deploy/workflows", methods=["GET"])
+async def api_auto_deploy_workflows() -> QuartReturn:
+    """Return available workflows and GPU types for the auto-deploy UI."""
+    return jsonify({
+        "workflows": allocator_bridge.get_available_workflows(),
+        "gpu_types": allocator_bridge.get_available_gpu_types(),
+    }), HTTPStatus.OK
+
+
 @route("/api/node/<node_name>", methods=["DELETE"])
 async def api_remove_node(node_name: str) -> QuartReturn:
     return await node_manager.remove_node(
diff --git a/streamwise/templates/add_pod.html b/streamwise/templates/add_pod.html
index d61952aa..f5496e10 100644
--- a/streamwise/templates/add_pod.html
+++ b/streamwise/templates/add_pod.html
@@ -384,6 +384,94 @@ <h2 class="mt-5">🧩 Applications</h2>
             </form>
         {% endif %}
 
+        <!-- Auto-Deploy Section -->
+        <h2 class="mt-5">🤖 Auto Deploy</h2>
+        <p>Specify your GPU budget and the optimizer will determine the best allocation for each component:</p>
+
+        <form id="auto-deploy-form">
+            <fieldset class="border rounded-3 p-3 mb-4">
+                <legend class="float-none w-auto px-2 fw-semibold">
+                    💰 GPU Budget
+                </legend>
+                <div class="row g-3 mb-3">
+                    <div class="col-md-3">
+                        <label for="auto_gpu_a100" class="form-label">A100</label>
+                        <input type="number" class="form-control" id="auto_gpu_a100" name="gpu_a100"
+                            min="0" max="64" value="8">
+                    </div>
+                    <div class="col-md-3">
+                        <label for="auto_gpu_h100" class="form-label">H100</label>
+                        <input type="number" class="form-control" id="auto_gpu_h100" name="gpu_h100"
+                            min="0" max="64" value="0">
+                    </div>
+                    <div class="col-md-3">
+                        <label for="auto_gpu_h200" class="form-label">H200</label>
+                        <input type="number" class="form-control" id="auto_gpu_h200" name="gpu_h200"
+                            min="0" max="64" value="0">
+                    </div>
+                    <div class="col-md-3">
+                        <label for="auto_gpu_gb200" class="form-label">GB200</label>
+                        <input type="number" class="form-control" id="auto_gpu_gb200" name="gpu_gb200"
+                            min="0" max="64" value="0">
+                    </div>
+                </div>
+            </fieldset>
+
+            <fieldset class="border rounded-3 p-3 mb-4">
+                <legend class="float-none w-auto px-2 fw-semibold">
+                    🎬 Workflow
+                </legend>
+                <div class="mb-3">
+                    <label for="auto_workflow" class="form-label">Application workflow</label>
+                    <select class="form-select" id="auto_workflow" name="workflow">
+                        <option value="streamcast" selected>🎙️ StreamCast (Podcast)</option>
+                        <option value="streampersona">👤 StreamPersona (Slide)</option>
+                        <option value="streamchat">💬 StreamChat (Video Chat)</option>
+                        <option value="streamshort">🎬 StreamShort (Shorts)</option>
+                        <option value="streammovie">🎬 StreamMovie (Movie)</option>
+                        <option value="streamanimate">🎞️ StreamAnimate (Story)</option>
+                        <option value="streamlecture">📚 StreamLecture (Lecture)</option>
+                        <option value="streamdub">🎤 StreamDub (Dubbing)</option>
+                        <option value="streamedit">✂️ StreamEdit (Editing)</option>
+                    </select>
+                </div>
+            </fieldset>
+
+            <div class="text-end mb-3">
+                <button type="submit" class="btn btn-warning" style="width: 200px;"
+                    id="auto-deploy-optimize-btn">
+                    🤖 Optimize
+                </button>
+            </div>
+        </form>
+
+        <!-- Auto-deploy results (hidden until optimize is clicked) -->
+        <div id="auto-deploy-results" style="display:none;">
+            <h4>📊 Optimized Deployment Plan</h4>
+            <div id="auto-deploy-metrics" class="alert alert-success mb-3"></div>
+            <table class="table table-sm table-bordered" id="auto-deploy-plan-table">
+                <thead>
+                    <tr>
+                        <th>Container</th>
+                        <th>GPU</th>
+                        <th>GPU Type</th>
+                        <th>CPU</th>
+                        <th>Memory</th>
+                        <th>MIG</th>
+                    </tr>
+                </thead>
+                <tbody id="auto-deploy-plan-body"></tbody>
+            </table>
+            <div class="text-end">
+                <button type="button" class="btn btn-success" style="width: 200px;"
+                    id="auto-deploy-confirm-btn">
+                    ✅ Confirm Deploy
+                </button>
+            </div>
+        </div>
+
+        <div id="auto-deploy-error" class="alert alert-danger mt-3" style="display:none;"></div>
+
         <script src="{{ url_for('static', filename='js/form-utils.js') }}"></script>
         <script>
             // Keep aligned with deployment/helm/values.yaml and services.json
@@ -685,6 +773,108 @@ <h2 class="mt-5">🧩 Applications</h2>
                     });
                 });
             }
+            // Auto-Deploy
+            const autoDeployForm = document.getElementById('auto-deploy-form');
+            if (autoDeployForm) {
+                let currentPlan = null;
+
+                autoDeployForm.addEventListener('submit', function(e) {
+                    e.preventDefault();
+                    const btn = document.getElementById('auto-deploy-optimize-btn');
+                    btn.disabled = true;
+                    btn.textContent = '⏳ Optimizing...';
+
+                    const gpuBudget = {
+                        'A100': parseInt(document.getElementById('auto_gpu_a100').value) || 0,
+                        'H100': parseInt(document.getElementById('auto_gpu_h100').value) || 0,
+                        'H200': parseInt(document.getElementById('auto_gpu_h200').value) || 0,
+                        'GB200': parseInt(document.getElementById('auto_gpu_gb200').value) || 0,
+                    };
+                    const workflow = document.getElementById('auto_workflow').value;
+
+                    const errorDiv = document.getElementById('auto-deploy-error');
+                    const resultsDiv = document.getElementById('auto-deploy-results');
+                    errorDiv.style.display = 'none';
+                    resultsDiv.style.display = 'none';
+
+                    fetch('/api/auto_deploy', {
+                        method: 'POST',
+                        headers: {'Content-Type': 'application/json'},
+                        body: JSON.stringify({gpu_budget: gpuBudget, workflow: workflow}),
+                        credentials: 'same-origin'
+                    })
+                    .then(response => response.json().then(data => ({ok: response.ok, data})))
+                    .then(({ok, data}) => {
+                        btn.disabled = false;
+                        btn.textContent = '🤖 Optimize';
+                        if (!ok) {
+                            errorDiv.textContent = data.error || 'Unknown error';
+                            errorDiv.style.display = '';
+                            return;
+                        }
+                        currentPlan = data;
+                        // Show metrics
+                        const metrics = data.metrics;
+                        document.getElementById('auto-deploy-metrics').innerHTML =
+                            `<strong>Total Time:</strong> ${metrics.total_time_s}s &nbsp;|&nbsp; ` +
+                            `<strong>TTFF:</strong> ${metrics.ttff_s}s &nbsp;|&nbsp; ` +
+                            `<strong>Cost:</strong> $${metrics.cost} &nbsp;|&nbsp; ` +
+                            `<strong>GPUs Used:</strong> ${JSON.stringify(metrics.gpus_used)}`;
+                        // Show plan table
+                        const tbody = document.getElementById('auto-deploy-plan-body');
+                        tbody.innerHTML = '';
+                        data.specs.forEach(spec => {
+                            const row = document.createElement('tr');
+                            row.innerHTML =
+                                `<td>${escapeHtml(spec.container_name)}</td>` +
+                                `<td>${spec.gpu}</td>` +
+                                `<td>${escapeHtml(spec.gpu_type || 'any')}</td>` +
+                                `<td>${spec.cpu}</td>` +
+                                `<td>${spec.memory_gib} GiB</td>` +
+                                `<td>${spec.mig_profile || '-'}</td>`;
+                            tbody.appendChild(row);
+                        });
+                        resultsDiv.style.display = '';
+                    })
+                    .catch(err => {
+                        btn.disabled = false;
+                        btn.textContent = '🤖 Optimize';
+                        errorDiv.textContent = 'Network error: ' + err;
+                        errorDiv.style.display = '';
+                    });
+                });
+
+                document.getElementById('auto-deploy-confirm-btn').addEventListener('click', function() {
+                    if (!currentPlan || !currentPlan.specs) return;
+
+                    const btn = this;
+                    btn.disabled = true;
+                    btn.textContent = '⏳ Deploying...';
+
+                    fetch('/api/auto_deploy/confirm', {
+                        method: 'POST',
+                        headers: {'Content-Type': 'application/json'},
+                        body: JSON.stringify({specs: currentPlan.specs}),
+                        credentials: 'same-origin'
+                    })
+                    .then(response => response.json().then(data => ({ok: response.ok, data})))
+                    .then(({ok, data}) => {
+                        btn.disabled = false;
+                        btn.textContent = '✅ Confirm Deploy';
+                        if (data.errors && data.errors.length > 0) {
+                            alert('Deployed ' + data.deployed.length + ' containers.\nErrors:\n' + data.errors.join('\n'));
+                        } else {
+                            alert(data.message || 'Deployment complete!');
+                        }
+                        window.location.href = '/';
+                    })
+                    .catch(err => {
+                        btn.disabled = false;
+                        btn.textContent = '✅ Confirm Deploy';
+                        alert('Error: ' + err);
+                    });
+                });
+            }
         </script>
         <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.5/dist/js/bootstrap.bundle.min.js"
             integrity="sha384-k6d4wzSIapyDyv1kpU366/PK5hCdSbCRGRCMv+eplOQJWyd1fbcAu9OCUj5zNLiq"
diff --git a/tests/simulator/test_auto_model_allocator.py b/tests/simulator/test_auto_model_allocator.py
index a9aa17d6..f7550822 100644
--- a/tests/simulator/test_auto_model_allocator.py
+++ b/tests/simulator/test_auto_model_allocator.py
@@ -23,7 +23,7 @@
 
 from tests.test_utils import temp_sys_path
 
-with temp_sys_path("simulator"):
+with temp_sys_path("simulator", "streamwise"):
     from sim_types import GPUType
     from sim_types import Model
     from sim_types import QualityLevel
@@ -33,18 +33,18 @@
 
     from data_loading import load_latency_data
 
-    from policies import STREAMWISE_POLICY
-    from policies import NAIVE_POLICY
-    from policies import HEXGEN_POLICY
-    from policies import HELIX_POLICY
+    from model_provisioner.policies import STREAMWISE_POLICY
+    from model_provisioner.policies import NAIVE_POLICY
+    from model_provisioner.policies import HEXGEN_POLICY
+    from model_provisioner.policies import HELIX_POLICY
 
     from auto_model_allocator import AutoModelAllocator
 
-    from greedy import GreedyAllocator
-    from naive_baseline import NaiveAllocator
-    from hexgen import HexGenAllocator
-    from helix import HelixAllocator
-    from milp import MILPAllocator
+    from model_provisioner.greedy import GreedyAllocator
+    from model_provisioner.naive_baseline import NaiveAllocator
+    from model_provisioner.hexgen import HexGenAllocator
+    from model_provisioner.helix import HelixAllocator
+    from model_provisioner.milp import MILPAllocator
 
     from workflows import PODCAST_WORKFLOW
 
diff --git a/tests/simulator/test_data_loading.py b/tests/simulator/test_data_loading.py
index 129a2f3b..72337375 100644
--- a/tests/simulator/test_data_loading.py
+++ b/tests/simulator/test_data_loading.py
@@ -11,7 +11,7 @@
 
 from tests.test_utils import temp_sys_path
 
-with temp_sys_path("simulator"):
+with temp_sys_path("simulator", "streamwise"):
     from sim_types import QualityLevel
 
     from data_loading import load_latency_data
diff --git a/tests/simulator/test_evaluator.py b/tests/simulator/test_evaluator.py
index a162e99b..b3c37e73 100644
--- a/tests/simulator/test_evaluator.py
+++ b/tests/simulator/test_evaluator.py
@@ -8,7 +8,7 @@
 from tests.test_utils import assert_equals_approx
 from tests.test_utils import temp_sys_path
 
-with temp_sys_path("simulator"):
+with temp_sys_path("simulator", "streamwise"):
     from constants import DEFAULT_WORKFLOW_CONFIG
     from constants import SECONDS_IN_HOUR
 
@@ -20,7 +20,7 @@
 
     from evaluator import evaluate_model_allocation
 
-    from policies import STREAMWISE_POLICY
+    from model_provisioner.policies import STREAMWISE_POLICY
 
     from models import FluxModelAllocation
     from models import GemmaModelAllocation
diff --git a/tests/simulator/test_greedy.py b/tests/simulator/test_greedy.py
index c33d6991..bfa2996e 100644
--- a/tests/simulator/test_greedy.py
+++ b/tests/simulator/test_greedy.py
@@ -8,7 +8,7 @@
 
 from tests.test_utils import temp_sys_path
 
-with temp_sys_path("simulator"):
+with temp_sys_path("simulator", "streamwise"):
     from constants import DEFAULT_WORKFLOW_CONFIG
     from constants import SECONDS_IN_HOUR
 
@@ -21,9 +21,9 @@
     from data_loading import load_latency_data
     from data_loading import load_power_data
 
-    from greedy import GreedyAllocator
+    from model_provisioner.greedy import GreedyAllocator
 
-    from policies import STREAMWISE_POLICY
+    from model_provisioner.policies import STREAMWISE_POLICY
 
 
 def test_allocate_8A_8H() -> None:
diff --git a/tests/simulator/test_helix.py b/tests/simulator/test_helix.py
index a336595d..7261b902 100644
--- a/tests/simulator/test_helix.py
+++ b/tests/simulator/test_helix.py
@@ -12,7 +12,7 @@
 
 from tests.test_utils import temp_sys_path
 
-with temp_sys_path("simulator"):
+with temp_sys_path("simulator", "streamwise"):
     from constants import DEFAULT_WORKFLOW_CONFIG
     from sim_types import GPUType
     from sim_types import Model
@@ -20,8 +20,8 @@
     from sim_types import Solver
     from data_loading import load_latency_data
     from data_loading import load_power_data
-    from helix import HelixAllocator
-    from policies import HELIX_POLICY
+    from model_provisioner.helix import HelixAllocator
+    from model_provisioner.policies import HELIX_POLICY
 
 
 def test_get_model_order() -> None:
diff --git a/tests/simulator/test_hexgen.py b/tests/simulator/test_hexgen.py
index 99e7eef5..3d77867b 100644
--- a/tests/simulator/test_hexgen.py
+++ b/tests/simulator/test_hexgen.py
@@ -7,12 +7,12 @@
 
 from tests.test_utils import temp_sys_path
 
-with temp_sys_path("simulator"):
+with temp_sys_path("simulator", "streamwise"):
     from constants import DEFAULT_WORKFLOW_CONFIG
     from sim_types import GPUType
     from data_loading import load_latency_data
-    from hexgen import HexGenAllocator
-    from hexgen import _get_model_order
+    from model_provisioner.hexgen import HexGenAllocator
+    from model_provisioner.hexgen import _get_model_order
     from sim_types import MODEL_ORDER
 
 
@@ -154,7 +154,7 @@ def test_no_gpus_error() -> None:
 
 def test_is_subclass_of_greedy() -> None:
     """HexGenAllocator should extend GreedyAllocator."""
-    from greedy import GreedyAllocator
+    from model_provisioner.greedy import GreedyAllocator
     latency_data = load_latency_data("simulator/data/")
     allocator = HexGenAllocator(
         workflow=DEFAULT_WORKFLOW_CONFIG,
diff --git a/tests/simulator/test_milp.py b/tests/simulator/test_milp.py
index 70c4bfa8..52a308bd 100644
--- a/tests/simulator/test_milp.py
+++ b/tests/simulator/test_milp.py
@@ -13,7 +13,7 @@
 
 from tests.test_utils import temp_sys_path
 
-with temp_sys_path("simulator"):
+with temp_sys_path("simulator", "streamwise"):
     from sim_types import LatencyData
     from sim_types import PowerData
     from sim_types import GPUType
@@ -27,11 +27,11 @@
     from constants import DEFAULT_WORKFLOW_CONFIG
     from constants import SECONDS_IN_HOUR
 
-    from policies import STREAMWISE_MILP_POLICY
+    from model_provisioner.policies import STREAMWISE_MILP_POLICY
 
     from workflows import WORKFLOWS
 
-    from milp import MILPAllocator
+    from model_provisioner.milp import MILPAllocator
 
     from evaluator import evaluate_model_allocation
 
diff --git a/tests/simulator/test_models.py b/tests/simulator/test_models.py
index 57e00a0a..eccb449b 100644
--- a/tests/simulator/test_models.py
+++ b/tests/simulator/test_models.py
@@ -16,7 +16,7 @@
 
 from tests.test_utils import temp_sys_path
 
-with temp_sys_path("simulator"):
+with temp_sys_path("simulator", "streamwise"):
     from sim_types import GPUType
     from sim_types import Model
     from sim_types import ModelAllocation
@@ -29,8 +29,8 @@
     from data_loading import load_latency_data
     from data_loading import load_power_data
 
-    from policies import STREAMWISE_POLICY
-    from policies import NAIVE_POLICY
+    from model_provisioner.policies import STREAMWISE_POLICY
+    from model_provisioner.policies import NAIVE_POLICY
 
     from models import get_model_allocation
     from models import _calculate_total_time
diff --git a/tests/simulator/test_multirequests_derive.py b/tests/simulator/test_multirequests_derive.py
index 8e7ed798..c809ccd0 100644
--- a/tests/simulator/test_multirequests_derive.py
+++ b/tests/simulator/test_multirequests_derive.py
@@ -7,7 +7,7 @@
 from tests.test_utils import assert_equal_dict
 from tests.test_utils import temp_sys_path
 
-with temp_sys_path("simulator"):
+with temp_sys_path("simulator", "streamwise"):
     from sim_types import GPUType
     from sim_types import Model
     from sim_types import QualityLevel
diff --git a/tests/simulator/test_simulator.py b/tests/simulator/test_simulator.py
index fc791151..d698bb9d 100644
--- a/tests/simulator/test_simulator.py
+++ b/tests/simulator/test_simulator.py
@@ -13,7 +13,7 @@
 
 from tests.test_utils import temp_sys_path
 
-with temp_sys_path("simulator"):
+with temp_sys_path("simulator", "streamwise"):
     from sim_types import WorkflowConfig
     from sim_types import Model
     from sim_types import Objective
@@ -26,10 +26,10 @@
     from data_loading import load_power_data
 
     from auto_model_allocator import AutoModelAllocator
-    from greedy import GreedyAllocator
+    from model_provisioner.greedy import GreedyAllocator
 
-    from policies import STREAMWISE_POLICY
-    from policies import NAIVE_POLICY
+    from model_provisioner.policies import STREAMWISE_POLICY
+    from model_provisioner.policies import NAIVE_POLICY
 
 
 def test_estimate_total_time() -> None:
diff --git a/tests/simulator/test_simulator_actions.py b/tests/simulator/test_simulator_actions.py
index dd3bf4fd..539946c5 100644
--- a/tests/simulator/test_simulator_actions.py
+++ b/tests/simulator/test_simulator_actions.py
@@ -7,7 +7,7 @@
 
 from tests.test_utils import temp_sys_path
 
-with temp_sys_path("simulator"):
+with temp_sys_path("simulator", "streamwise"):
     from sim_types import Action
     from sim_types import ActionName
     from sim_types import GPUType
diff --git a/tests/simulator/test_simulator_baseline.py b/tests/simulator/test_simulator_baseline.py
index 64282777..b195a1cf 100644
--- a/tests/simulator/test_simulator_baseline.py
+++ b/tests/simulator/test_simulator_baseline.py
@@ -11,7 +11,7 @@
 
 from tests.test_utils import temp_sys_path
 
-with temp_sys_path("simulator"):
+with temp_sys_path("simulator", "streamwise"):
     from sim_types import GPUType
     from sim_types import Model
 
@@ -24,12 +24,12 @@
     from data_loading import load_power_data
 
     from auto_model_allocator import AutoModelAllocator
-    from naive_baseline import NaiveAllocator
-    from greedy import GreedyAllocator
+    from model_provisioner.naive_baseline import NaiveAllocator
+    from model_provisioner.greedy import GreedyAllocator
 
-    from policies import NAIVE_POLICY
-    from policies import BASELINE_POLICIES
-    from policies import STREAMWISE_POLICY
+    from model_provisioner.policies import NAIVE_POLICY
+    from model_provisioner.policies import BASELINE_POLICIES
+    from model_provisioner.policies import STREAMWISE_POLICY
 
     from workflows import SHORTS_WORKFLOW
     from workflows import WORKFLOWS
diff --git a/tests/simulator/test_simulator_energy.py b/tests/simulator/test_simulator_energy.py
index 16b6e8bf..c96fd128 100644
--- a/tests/simulator/test_simulator_energy.py
+++ b/tests/simulator/test_simulator_energy.py
@@ -9,7 +9,7 @@
 
 from tests.test_utils import temp_sys_path
 
-with temp_sys_path("simulator"):
+with temp_sys_path("simulator", "streamwise"):
     from constants import DEFAULT_WORKFLOW_CONFIG
 
     from sim_types import GPUType
@@ -21,11 +21,11 @@
     from data_loading import load_power_data
 
     from auto_model_allocator import AutoModelAllocator
-    from greedy import GreedyAllocator
-    from naive_baseline import NaiveAllocator
+    from model_provisioner.greedy import GreedyAllocator
+    from model_provisioner.naive_baseline import NaiveAllocator
 
-    from policies import NAIVE_POLICY
-    from policies import STREAMWISE_POLICY
+    from model_provisioner.policies import NAIVE_POLICY
+    from model_provisioner.policies import STREAMWISE_POLICY
 
 
 def test_energy() -> None:
diff --git a/tests/simulator/test_simulator_multirequests.py b/tests/simulator/test_simulator_multirequests.py
index 972596ec..6403baba 100644
--- a/tests/simulator/test_simulator_multirequests.py
+++ b/tests/simulator/test_simulator_multirequests.py
@@ -7,7 +7,7 @@
 from tests.test_utils import assert_equals_approx
 from tests.test_utils import temp_sys_path
 
-with temp_sys_path("simulator"):
+with temp_sys_path("simulator", "streamwise"):
     from multirequests import QPM_LIST
     from multirequests import get_replicas
     from multirequests import get_costs
diff --git a/tests/simulator/test_simulator_plotutils.py b/tests/simulator/test_simulator_plotutils.py
index cee69368..b3bdead9 100644
--- a/tests/simulator/test_simulator_plotutils.py
+++ b/tests/simulator/test_simulator_plotutils.py
@@ -6,7 +6,7 @@
 
 from tests.test_utils import temp_sys_path
 
-with temp_sys_path("simulator"):
+with temp_sys_path("simulator", "streamwise"):
     from plot_utils import plot_ttff_vs_cost
     from plot_utils import plot_ttff_vs_energy
     from plot_utils import plot_adaptive_quality
diff --git a/tests/simulator/test_simulator_policies.py b/tests/simulator/test_simulator_policies.py
index ffab5ba0..d9e1421f 100644
--- a/tests/simulator/test_simulator_policies.py
+++ b/tests/simulator/test_simulator_policies.py
@@ -11,9 +11,9 @@
 
 from tests.test_utils import temp_sys_path
 
-with temp_sys_path("simulator"):
-    from policies import STREAMWISE_POLICY
-    from policies import BASELINE_POLICIES
+with temp_sys_path("simulator", "streamwise"):
+    from model_provisioner.policies import STREAMWISE_POLICY
+    from model_provisioner.policies import BASELINE_POLICIES
 
     from sim_types import Objective
 
diff --git a/tests/simulator/test_simulator_provisioning.py b/tests/simulator/test_simulator_provisioning.py
index 6bd142ae..fb5d46fd 100644
--- a/tests/simulator/test_simulator_provisioning.py
+++ b/tests/simulator/test_simulator_provisioning.py
@@ -7,7 +7,7 @@
 
 from tests.test_utils import temp_sys_path
 
-with temp_sys_path("simulator"):
+with temp_sys_path("simulator", "streamwise"):
     from constants import DEFAULT_WORKFLOW_CONFIG
 
     from provisioning import get_provisioning_results
@@ -23,9 +23,9 @@
 
     from data_loading import load_latency_data
 
-    from policies import NAIVE_POLICY
-    from policies import STREAMWISE_POLICY
-    from policies import HEXGEN_POLICY
+    from model_provisioner.policies import NAIVE_POLICY
+    from model_provisioner.policies import STREAMWISE_POLICY
+    from model_provisioner.policies import HEXGEN_POLICY
 
 
 @pytest.mark.parametrize("gpu_type", [gpu_type for gpu_type in GPUType])
diff --git a/tests/simulator/test_simulator_types.py b/tests/simulator/test_simulator_types.py
index 8bfc292f..223a3260 100644
--- a/tests/simulator/test_simulator_types.py
+++ b/tests/simulator/test_simulator_types.py
@@ -8,7 +8,7 @@
 
 from tests.test_utils import temp_sys_path
 
-with temp_sys_path("simulator"):
+with temp_sys_path("simulator", "streamwise"):
     from sim_types import Model
     from sim_types import GPUType
 
@@ -20,7 +20,7 @@
     from models import GemmaModelAllocation
     from models import FluxModelAllocation
 
-    from policies import STREAMWISE_POLICY
+    from model_provisioner.policies import STREAMWISE_POLICY
 
     from workflows import PODCAST_WORKFLOW
 
diff --git a/tests/simulator/test_simulator_utils.py b/tests/simulator/test_simulator_utils.py
index 9711a696..b78d675d 100644
--- a/tests/simulator/test_simulator_utils.py
+++ b/tests/simulator/test_simulator_utils.py
@@ -6,7 +6,7 @@
 
 from tests.test_utils import temp_sys_path
 
-with temp_sys_path("simulator"):
+with temp_sys_path("simulator", "streamwise"):
     from sim_types import Model
     from sim_types import GPUType
     from sim_types import ModelAllocation
diff --git a/tests/simulator/test_workflows.py b/tests/simulator/test_workflows.py
index bff7ed56..b38dc2ab 100644
--- a/tests/simulator/test_workflows.py
+++ b/tests/simulator/test_workflows.py
@@ -15,7 +15,7 @@
 
 from tests.test_utils import temp_sys_path
 
-with temp_sys_path("simulator"):
+with temp_sys_path("simulator", "streamwise"):
     from sim_types import WorkflowConfig, Model, QualityLevel, GPUType
     from constants import (
         FPS,
@@ -28,7 +28,7 @@
     )
     from data_loading import load_latency_data
     from auto_model_allocator import AutoModelAllocator
-    from policies import STREAMWISE_POLICY, NAIVE_POLICY
+    from model_provisioner.policies import STREAMWISE_POLICY, NAIVE_POLICY
     from workflows import (
         MAX_FT_FRAMES,
         SUBSCENE_SECONDS,
diff --git a/tests/streamwise/test_allocator_bridge.py b/tests/streamwise/test_allocator_bridge.py
new file mode 100644
index 00000000..569e4073
--- /dev/null
+++ b/tests/streamwise/test_allocator_bridge.py
@@ -0,0 +1,282 @@
+"""
+Tests for streamwise/allocator_bridge.py.
+
+Covers:
+- Model-to-container name mapping.
+- Result to deployment specs conversion.
+- run_allocator end-to-end (with real latency data).
+- Error handling for invalid inputs.
+"""
+
+from __future__ import annotations
+
+import sys
+import os
+
+import pytest
+
+# Add current path and simulator/ permanently so lazy imports
+# (e.g. GreedyAllocator via auto_model_allocator) resolve at test time.
+sys.path.append(os.getcwd())
+sys.path[:0] = [os.path.join(os.getcwd(), "simulator")]
+
+from tests.test_utils import temp_sys_path
+
+with temp_sys_path("streamwise", "simulator"):
+    from allocator_bridge import (
+        MODEL_TO_CONTAINERS,
+        CONTAINER_RESOURCES,
+        GPU_TYPE_TO_POD_STR,
+        APP_TO_WORKFLOW,
+        DeploymentSpec,
+        DeploymentPlan,
+        get_available_workflows,
+        get_available_gpu_types,
+        result_to_deployment_specs,
+        deployment_plan_to_json,
+        run_allocator,
+    )
+    from sim_types import GPUType, Model, Result
+    from models import (
+        GemmaModelAllocation,
+        FluxModelAllocation,
+        HFModelAllocation,
+        HFVAEModelAllocation,
+        FTModelAllocation,
+        OthersModelAllocation,
+        UpscalerModelAllocation,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Mapping correctness
+# ---------------------------------------------------------------------------
+
+def test_model_to_containers_covers_all_models() -> None:
+    """Every Model enum value must have a mapping entry."""
+    for model in Model:
+        assert model in MODEL_TO_CONTAINERS, f"Missing mapping for {model}"
+
+
+def test_container_resources_covers_all_mapped_containers() -> None:
+    """Every container referenced in MODEL_TO_CONTAINERS must have resource defaults."""
+    for model, containers in MODEL_TO_CONTAINERS.items():
+        for container in containers:
+            assert container in CONTAINER_RESOURCES, (
+                f"Missing CONTAINER_RESOURCES for '{container}' (from {model})")
+
+
+def test_gpu_type_to_pod_str_covers_all_gpu_types() -> None:
+    """Every GPUType enum value must have a pod string mapping."""
+    for gpu_type in GPUType:
+        assert gpu_type in GPU_TYPE_TO_POD_STR
+
+
+def test_app_to_workflow_has_expected_entries() -> None:
+    """Key StreamWise apps should map to workflows."""
+    assert "streamcast" in APP_TO_WORKFLOW
+    assert "streampersona" in APP_TO_WORKFLOW
+    assert "streamchat" in APP_TO_WORKFLOW
+
+
+# ---------------------------------------------------------------------------
+# Utility functions
+# ---------------------------------------------------------------------------
+
+def test_get_available_workflows() -> None:
+    workflows = get_available_workflows()
+    assert isinstance(workflows, list)
+    assert "streamcast" in workflows
+    assert len(workflows) >= 5
+
+
+def test_get_available_gpu_types() -> None:
+    gpu_types = get_available_gpu_types()
+    assert isinstance(gpu_types, list)
+    assert "A100" in gpu_types
+    assert "H100" in gpu_types
+
+
+# ---------------------------------------------------------------------------
+# result_to_deployment_specs
+# ---------------------------------------------------------------------------
+
+def test_result_to_deployment_specs_basic() -> None:
+    """A simple result with one active allocation maps to the right container."""
+    models = {
+        GPUType.A100: {
+            Model.GEMMA: [GemmaModelAllocation(gpu_type=GPUType.A100, devices=1, replicas=1)],
+            Model.FLUX: [FluxModelAllocation(gpu_type=GPUType.A100, devices=2, replicas=1)],
+            Model.HF: [HFModelAllocation(gpu_type=GPUType.A100, devices=2, replicas=2)],
+            Model.HF_VAE: [HFVAEModelAllocation(gpu_type=GPUType.A100, devices=1, replicas=1)],
+            Model.FT: [FTModelAllocation(gpu_type=GPUType.A100, devices=1, replicas=0)],
+            Model.FT_VAE: [],
+            Model.UPSCALER: [UpscalerModelAllocation(gpu_type=GPUType.A100, devices=1, replicas=0)],
+            Model.OTHERS: [OthersModelAllocation(gpu_type=GPUType.A100, devices=1, replicas=1)],
+        }
+    }
+    result = Result(
+        total_time_s=100.0,
+        ttff_s=10.0,
+        cost=1.0,
+        gpus_used={GPUType.A100: 8},
+        gpus_total={GPUType.A100: 8},
+        models=models,
+    )
+
+    specs = result_to_deployment_specs(result)
+    assert isinstance(specs, list)
+    assert len(specs) > 0
+
+    container_names = [s.container_name for s in specs]
+    assert "gemma" in container_names
+    assert "flux" in container_names
+    assert "hunyuanframepackf1" in container_names  # HF model
+    assert "hunyuanframepackvae" in container_names  # HF_VAE model
+
+    # OTHERS maps to kokoro + yolo
+    assert "kokoro" in container_names
+    assert "yolo" in container_names
+
+    # Check GPU type mapping
+    gemma_spec = next(s for s in specs if s.container_name == "gemma")
+    assert gemma_spec.gpu_type == "a100"
+    assert gemma_spec.gpu == 1
+
+    # MIG containers get mig_profile set
+    kokoro_spec = next(s for s in specs if s.container_name == "kokoro")
+    assert kokoro_spec.mig_profile == "1g.10gb"
+
+
+def test_result_to_deployment_specs_skips_zero_replicas() -> None:
+    """Allocations with zero replicas should not produce deployment specs."""
+    models = {
+        GPUType.A100: {
+            Model.GEMMA: [GemmaModelAllocation(gpu_type=GPUType.A100, devices=1, replicas=0)],
+            Model.FLUX: [FluxModelAllocation(gpu_type=GPUType.A100, devices=1, replicas=0)],
+            Model.HF: [HFModelAllocation(gpu_type=GPUType.A100, devices=1, replicas=0)],
+            Model.HF_VAE: [],
+            Model.FT: [],
+            Model.FT_VAE: [],
+            Model.UPSCALER: [],
+            Model.OTHERS: [],
+        }
+    }
+    result = Result(
+        total_time_s=0.0,
+        ttff_s=0.0,
+        cost=0.0,
+        gpus_used={GPUType.A100: 0},
+        gpus_total={GPUType.A100: 8},
+        models=models,
+    )
+    specs = result_to_deployment_specs(result)
+    assert specs == []
+
+
+def test_result_to_deployment_specs_multiple_replicas() -> None:
+    """Multiple replicas should produce multiple deployment specs for same container."""
+    models = {
+        GPUType.H100: {
+            Model.GEMMA: [GemmaModelAllocation(gpu_type=GPUType.H100, devices=1, replicas=1)],
+            Model.FLUX: [FluxModelAllocation(gpu_type=GPUType.H100, devices=1, replicas=1)],
+            Model.HF: [HFModelAllocation(gpu_type=GPUType.H100, devices=2, replicas=3)],
+            Model.HF_VAE: [],
+            Model.FT: [],
+            Model.FT_VAE: [],
+            Model.UPSCALER: [],
+            Model.OTHERS: [],
+        }
+    }
+    result = Result(
+        total_time_s=50.0,
+        ttff_s=5.0,
+        cost=0.5,
+        gpus_used={GPUType.H100: 8},
+        gpus_total={GPUType.H100: 16},
+        models=models,
+    )
+    specs = result_to_deployment_specs(result)
+    hf_specs = [s for s in specs if s.container_name == "hunyuanframepackf1"]
+    assert len(hf_specs) == 3  # 3 replicas
+    for spec in hf_specs:
+        assert spec.gpu == 2
+        assert spec.gpu_type == "h100"
+
+
+# ---------------------------------------------------------------------------
+# deployment_plan_to_json
+# ---------------------------------------------------------------------------
+
+def test_deployment_plan_to_json() -> None:
+    """Serialization should produce all expected keys."""
+    result = Result(
+        total_time_s=100.0,
+        ttff_s=10.0,
+        cost=1.5,
+        gpus_used={GPUType.A100: 8},
+        gpus_total={GPUType.A100: 8},
+        models={},
+    )
+    plan = DeploymentPlan(
+        specs=[
+            DeploymentSpec(
+                container_name="gemma", cpu=16, memory_gib=192,
+                ephemeral_storage_gib=64, gpu=2, gpu_type="a100", mig_profile=None)
+        ],
+        result=result,
+        workflow_name="streamcast",
+        gpu_budget={"A100": 8},
+    )
+    data = deployment_plan_to_json(plan)
+    assert data["workflow_name"] == "streamcast"
+    assert data["gpu_budget"] == {"A100": 8}
+    assert data["metrics"]["total_time_s"] == 100.0
+    assert data["metrics"]["ttff_s"] == 10.0
+    assert len(data["specs"]) == 1
+    assert data["specs"][0]["container_name"] == "gemma"
+
+
+# ---------------------------------------------------------------------------
+# run_allocator (integration with real data)
+# ---------------------------------------------------------------------------
+
+def test_run_allocator_streamcast_8_a100() -> None:
+    """Run allocator for StreamCast with 8 A100s — should produce a valid plan."""
+    plan = run_allocator(
+        gpu_budget={"A100": 8},
+        workflow_name="streamcast",
+    )
+    assert isinstance(plan, DeploymentPlan)
+    assert len(plan.specs) > 0
+    assert plan.result.total_time_s > 0
+    assert plan.result.ttff_s > 0
+    assert plan.workflow_name == "streamcast"
+
+
+def test_run_allocator_streamchat_8_h100() -> None:
+    """Run allocator for StreamChat with 8 H100s."""
+    plan = run_allocator(
+        gpu_budget={"H100": 8},
+        workflow_name="streamchat",
+    )
+    assert isinstance(plan, DeploymentPlan)
+    assert len(plan.specs) > 0
+
+
+def test_run_allocator_invalid_workflow() -> None:
+    """Unknown workflow name raises ValueError."""
+    with pytest.raises(ValueError, match="Unknown workflow"):
+        run_allocator(gpu_budget={"A100": 8}, workflow_name="nonexistent")
+
+
+def test_run_allocator_invalid_gpu_type() -> None:
+    """Unknown GPU type raises ValueError."""
+    with pytest.raises(ValueError, match="Unknown GPU type"):
+        run_allocator(gpu_budget={"RTX4090": 8}, workflow_name="streamcast")
+
+
+def test_run_allocator_insufficient_gpus() -> None:
+    """Too few GPUs raises ValueError."""
+    with pytest.raises(ValueError, match="at least 8"):
+        run_allocator(gpu_budget={"A100": 4}, workflow_name="streamcast")
diff --git a/tests/streamwise/test_streamwise_auto_deploy.py b/tests/streamwise/test_streamwise_auto_deploy.py
new file mode 100644
index 00000000..a191785a
--- /dev/null
+++ b/tests/streamwise/test_streamwise_auto_deploy.py
@@ -0,0 +1,226 @@
+"""
+Tests for the auto-deploy API endpoints in streamwise.py.
+
+Covers:
+- POST /api/auto_deploy — returns optimized plan.
+- POST /api/auto_deploy/confirm — deploys the plan.
+- GET /api/auto_deploy/workflows — lists available options.
+- Error cases (missing fields, invalid inputs).
+"""
+
+from __future__ import annotations
+
+import sys
+
+import pytest
+
+from http import HTTPStatus
+from unittest.mock import patch
+
+from tests.test_utils import temp_sys_path
+from tests.k8s_mock import K8sMock
+
+mock_k8s = K8sMock()
+
+mock_modules = {}
+mock_modules.update(mock_k8s.get_sub_modules())
+
+import streamwise.http_session_manager  # noqa: F401 — registers the streamwise package
+
+# Permanently inject K8s mocks into sys.modules (not via context manager)
+# so that simulator modules loaded alongside streamwise remain importable
+# after setup completes.
+_original_modules = {}
+for mod_name, mock_mod in mock_modules.items():
+    _original_modules[mod_name] = sys.modules.get(mod_name)
+    sys.modules[mod_name] = mock_mod
+
+with temp_sys_path("streamwise"):
+    from streamwise import streamwise as sw
+
+
+def _get_client():  # type: ignore[no-untyped-def]
+    app = sw.app
+    return app.test_client()
+
+
+@pytest.fixture(scope="function", autouse=True)
+def setup_k8s_cluster() -> None:
+    sw.k8s_cluster = "unittest"
+    sw.use_https = False
+
+
+# ---------------------------------------------------------------------------
+# GET /api/auto_deploy/workflows
+# ---------------------------------------------------------------------------
+
+@pytest.mark.asyncio
+async def test_auto_deploy_workflows() -> None:
+    """Should return available workflows and GPU types."""
+    client = _get_client()
+    response = await client.get("/api/auto_deploy/workflows")
+    assert response.status_code == HTTPStatus.OK
+    data = await response.get_json()
+    assert "workflows" in data
+    assert "gpu_types" in data
+    assert "streamcast" in data["workflows"]
+    assert "A100" in data["gpu_types"]
+
+
+# ---------------------------------------------------------------------------
+# POST /api/auto_deploy
+# ---------------------------------------------------------------------------
+
+@pytest.mark.asyncio
+async def test_auto_deploy_success() -> None:
+    """Valid request returns an optimized deployment plan."""
+    fake_json = {
+        "workflow_name": "streamcast",
+        "gpu_budget": {"A100": 8},
+        "metrics": {"total_time_s": 3.5, "ttff_s": 1.0, "cost": 12.0, "gpus_used": {"A100": 3}},
+        "specs": [
+            {"container_name": "gemma", "cpu": 4, "memory_gib": 16,
+             "ephemeral_storage_gib": 10, "gpu": 1, "gpu_type": "A100", "mig_profile": None},
+            {"container_name": "flux", "cpu": 4, "memory_gib": 16,
+             "ephemeral_storage_gib": 10, "gpu": 2, "gpu_type": "A100", "mig_profile": None},
+        ],
+    }
+    # Patch on the actual module object that streamwise.py holds a reference to.
+    with patch.object(sw.allocator_bridge, "run_allocator") as mock_alloc, \
+         patch.object(sw.allocator_bridge, "deployment_plan_to_json", return_value=fake_json):
+        mock_alloc.return_value = "fake_plan"
+        client = _get_client()
+        response = await client.post(
+            "/api/auto_deploy",
+            json={
+                "gpu_budget": {"A100": 8},
+                "workflow": "streamcast",
+            },
+        )
+    assert response.status_code == HTTPStatus.OK
+    data = await response.get_json()
+    assert "specs" in data
+    assert "metrics" in data
+    assert len(data["specs"]) == 2
+    assert data["metrics"]["total_time_s"] == 3.5
+
+
+@pytest.mark.asyncio
+async def test_auto_deploy_missing_gpu_budget() -> None:
+    """Missing gpu_budget field returns 400."""
+    client = _get_client()
+    response = await client.post(
+        "/api/auto_deploy",
+        json={"workflow": "streamcast"},
+    )
+    assert response.status_code == HTTPStatus.BAD_REQUEST
+
+
+@pytest.mark.asyncio
+async def test_auto_deploy_missing_workflow() -> None:
+    """Missing workflow field returns 400."""
+    client = _get_client()
+    response = await client.post(
+        "/api/auto_deploy",
+        json={"gpu_budget": {"A100": 8}},
+    )
+    assert response.status_code == HTTPStatus.BAD_REQUEST
+
+
+@pytest.mark.asyncio
+async def test_auto_deploy_invalid_workflow() -> None:
+    """Invalid workflow name returns 400."""
+    client = _get_client()
+    response = await client.post(
+        "/api/auto_deploy",
+        json={
+            "gpu_budget": {"A100": 8},
+            "workflow": "nonexistent",
+        },
+    )
+    assert response.status_code == HTTPStatus.BAD_REQUEST
+    data = await response.get_json()
+    assert "error" in data
+
+
+@pytest.mark.asyncio
+async def test_auto_deploy_insufficient_gpus() -> None:
+    """Too few GPUs returns 400."""
+    client = _get_client()
+    response = await client.post(
+        "/api/auto_deploy",
+        json={
+            "gpu_budget": {"A100": 2},
+            "workflow": "streamcast",
+        },
+    )
+    assert response.status_code == HTTPStatus.BAD_REQUEST
+
+
+@pytest.mark.asyncio
+async def test_auto_deploy_no_json_body() -> None:
+    """No JSON body returns 400."""
+    client = _get_client()
+    response = await client.post("/api/auto_deploy")
+    assert response.status_code == HTTPStatus.BAD_REQUEST
+
+
+# ---------------------------------------------------------------------------
+# POST /api/auto_deploy/confirm
+# ---------------------------------------------------------------------------
+
+@pytest.mark.asyncio
+async def test_auto_deploy_confirm_success() -> None:
+    """Valid confirm request deploys containers."""
+    client = _get_client()
+    specs = [
+        {
+            "container_name": "gemma",
+            "cpu": 16,
+            "memory_gib": 192,
+            "ephemeral_storage_gib": 64,
+            "gpu": 2,
+            "gpu_type": "a100",
+            "mig_profile": None,
+        },
+        {
+            "container_name": "flux",
+            "cpu": 12,
+            "memory_gib": 128,
+            "ephemeral_storage_gib": 64,
+            "gpu": 2,
+            "gpu_type": "a100",
+            "mig_profile": None,
+        },
+    ]
+    response = await client.post(
+        "/api/auto_deploy/confirm",
+        json={"specs": specs},
+    )
+    # Should succeed (mocked K8s)
+    assert response.status_code in (HTTPStatus.OK, HTTPStatus.MULTI_STATUS)
+    data = await response.get_json()
+    assert "deployed" in data
+    assert "message" in data
+
+
+@pytest.mark.asyncio
+async def test_auto_deploy_confirm_missing_specs() -> None:
+    """Missing specs returns 400."""
+    client = _get_client()
+    response = await client.post(
+        "/api/auto_deploy/confirm",
+        json={},
+    )
+    assert response.status_code == HTTPStatus.BAD_REQUEST
+
+
+@pytest.mark.asyncio
+async def test_auto_deploy_confirm_empty_specs() -> None:
+    """Empty specs list returns 400."""
+    client = _get_client()
+    response = await client.post(
+        "/api/auto_deploy/confirm",
+        json={"specs": []},
+    )
+    assert response.status_code == HTTPStatus.BAD_REQUEST
diff --git a/wrapper/run_httpserver.py b/wrapper/run_httpserver.py
index a9ec16ad..6ca398fe 100644
--- a/wrapper/run_httpserver.py
+++ b/wrapper/run_httpserver.py
@@ -1266,8 +1266,8 @@ async def send_task(gen_task: dict) -> None:
 
     try:
         payload_bytes = await asyncio.to_thread(pickle.dumps, gen_task)
-        payload_bytes = bytearray(payload_bytes)
-        payload_tensor = torch.frombuffer(payload_bytes, dtype=torch.uint8).to("cuda")
+        payload_buffer = bytearray(payload_bytes)
+        payload_tensor = torch.frombuffer(payload_buffer, dtype=torch.uint8).to("cuda")
         payload_size = torch.tensor([payload_tensor.numel()], dtype=torch.int64, device="cuda")
 
         if payload_size.item() > MAX_PAYLOAD_BYTES: