Azure · James-QiuHaoran · May 16, 2026 · May 16, 2026 · May 17, 2026
diff --git a/.flake8 b/.flake8
@@ -3,3 +3,4 @@ max-line-length = 120
 # Ignore E402: module-level import not at top of file
 # Ignore W503: line break before binary operator (incompatible with W504)
 ignore = E402,W503
+exclude = .venv
diff --git a/.gitignore b/.gitignore
@@ -11,6 +11,9 @@
 *.sln.docstates
 *.env
 
+# Environment files
+.venv/
+
 # User-specific files (MonoDevelop/Xamarin Studio)
 *.userprefs
 

diff --git a/simulator/__init__.py b/simulator/__init__.py
@@ -0,0 +1,15 @@
+"""
+Simulator package — provisioning sweeps, multi-request analysis, and plotting
+on top of the model_provisioner allocation policies.
+
+The allocation policy implementations live in ``streamwise/model_provisioner/``.
+"""
+import os
+import sys
+
+# Make model_provisioner importable for simulator modules.
+_STREAMWISE_DIR = os.path.normpath(
+    os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "streamwise")
+)
+if _STREAMWISE_DIR not in sys.path:
+    sys.path.insert(0, _STREAMWISE_DIR)
diff --git a/simulator/actions.py b/simulator/actions.py
@@ -27,7 +27,7 @@
 from sim_types import Objective
 from sim_types import Policy
 
-from policies import STREAMWISE_POLICY
+from model_provisioner.policies import STREAMWISE_POLICY
 
 from models import get_model_allocation
 

diff --git a/simulator/auto_model_allocator.py b/simulator/auto_model_allocator.py
@@ -19,7 +19,7 @@
 from sim_types import GPUType
 from sim_types import Result
 
-from policies import STREAMWISE_POLICY
+from model_provisioner.policies import STREAMWISE_POLICY
 
 from model_allocator import ModelAllocator
 
@@ -47,39 +47,39 @@ def __init__(
     def _build_allocator(self) -> ModelAllocator:
         """Create concrete allocator based on configured solver."""
         if self.policy.solver == Solver.GREEDY:
-            from greedy import GreedyAllocator
+            from model_provisioner.greedy import GreedyAllocator
             return GreedyAllocator(
                 workflow=self.workflow,
                 latency_data=self.latency_data,
                 power_data=self.power_data,
                 policy=self.policy,
             )
         if self.policy.solver == Solver.NAIVE:
-            from naive_baseline import NaiveAllocator
+            from model_provisioner.naive_baseline import NaiveAllocator
             return NaiveAllocator(
                 workflow=self.workflow,
                 latency_data=self.latency_data,
                 power_data=self.power_data,
                 policy=self.policy,
             )
         if self.policy.solver in {Solver.GUROBI, Solver.HIGHS}:
-            from milp import MILPAllocator
+            from model_provisioner.milp import MILPAllocator
             return MILPAllocator(
                 workflow=self.workflow,
                 latency_data=self.latency_data,
                 power_data=self.power_data,
                 policy=self.policy,
             )
         if self.policy.solver == Solver.HEXGEN:
-            from hexgen import HexGenAllocator
+            from model_provisioner.hexgen import HexGenAllocator
             return HexGenAllocator(
                 workflow=self.workflow,
                 latency_data=self.latency_data,
                 power_data=self.power_data,
                 policy=self.policy,
             )
         if self.policy.solver == Solver.HELIX:
-            from helix import HelixAllocator
+            from model_provisioner.helix import HelixAllocator
             return HelixAllocator(
                 workflow=self.workflow,
                 latency_data=self.latency_data,

diff --git a/simulator/data_loading.py b/simulator/data_loading.py
@@ -28,15 +28,17 @@
 from constants import POWER_GPU_IDLE
 from constants import POWER_GPU_TDP
 
+_DEFAULT_DATA_DIR = Path("data")
+
 
 def load_latency_data(
-    data_dir: str = "data/",
+    data_dir: str | Path = _DEFAULT_DATA_DIR,
 ) -> LatencyData:
     """
     Load latency and throughput mapping data from CSV files.
 
     Args:
-        data_dir (str): The directory where the CSV files are stored.
+        data_dir: The directory where the CSV files are stored.
     Returns:
         LatencyData: An object containing all loaded latency data.
     """
@@ -107,13 +109,13 @@ def load_latency_data(
 
 
 def load_power_data(
-    data_dir: str = "data/"
+    data_dir: str | Path = _DEFAULT_DATA_DIR
 ) -> PowerData:
     """
     Load power consumption data from CSV files.
 
     Args:
-        data_dir (str): The directory where the CSV files are stored.
+        data_dir: The directory where the CSV files are stored.
     Returns:
         PowerData: An object containing all loaded power consumption data.
     """
@@ -216,7 +218,7 @@ def load_power_data(
 
 
 def load_adaptive_quality_data(
-    data_dir: str,
+    data_dir: str | Path,
     level: QualityLevel,
 ) -> LatencyData:
     """Load latency data for adaptive quality."""

diff --git a/simulator/model_allocator.py b/simulator/model_allocator.py
@@ -27,7 +27,7 @@
 from models import UpscalerModelAllocation
 from models import OthersModelAllocation
 
-from policies import NAIVE_POLICY
+from model_provisioner.policies import NAIVE_POLICY
 
 
 class ModelAllocator(ABC):

diff --git a/simulator/multirequests.py b/simulator/multirequests.py
@@ -18,7 +18,7 @@
 
 from workflows import PODCAST_WORKFLOW
 
-from policies import STREAMWISE_POLICY
+from model_provisioner.policies import STREAMWISE_POLICY
 
 from auto_model_allocator import AutoModelAllocator
 

diff --git a/simulator/provisioning.py b/simulator/provisioning.py
@@ -33,7 +33,7 @@
 
 from auto_model_allocator import AutoModelAllocator
 
-from policies import STREAMWISE_POLICY
+from model_provisioner.policies import STREAMWISE_POLICY
 
 from constants import SECONDS_IN_HOUR
 

diff --git a/streamwise/model_provisioner/__init__.py b/streamwise/model_provisioner/__init__.py
@@ -0,0 +1,20 @@
+"""
+Model Provisioner — allocation policy implementations for GPU resource distribution.
+
+Contains greedy, naive, MILP, HexGen, and Helix allocation strategies.
+The foundation types (sim_types, constants, models, etc.) live in simulator/.
+"""
+import os
+import sys
+
+# Add simulator/ to sys.path so policy files can import foundation modules.
+# Supports both local dev layout (../../simulator) and Docker layout (../simulator).
+_HERE = os.path.dirname(os.path.abspath(__file__))
+_CANDIDATES = [
+    os.path.normpath(os.path.join(_HERE, "..", "..", "simulator")),
+    os.path.normpath(os.path.join(_HERE, "..", "simulator")),
+]
+for _path in _CANDIDATES:
+    if os.path.isdir(_path) and _path not in sys.path:
+        sys.path.insert(0, _path)
+        break
diff --git a/simulator/greedy.py → streamwise/model_provisioner/greedy.py b/simulator/greedy.py → streamwise/model_provisioner/greedy.py
@@ -33,9 +33,9 @@
 
 from model_allocator import ModelAllocator
 
-from policies import STREAMWISE_POLICY
-from policies import MAX_ITERATIONS
-from policies import USE_ALL_GPUS
+from .policies import STREAMWISE_POLICY
+from .policies import MAX_ITERATIONS
+from .policies import USE_ALL_GPUS
 
 from actions import gen_actions
 from actions import choose_action

diff --git a/simulator/helix.py → streamwise/model_provisioner/helix.py b/simulator/helix.py → streamwise/model_provisioner/helix.py
@@ -43,10 +43,10 @@
 
 from evaluator import evaluate_model_allocation
 
-from milp import MILPAllocator
+from .milp import MILPAllocator
 
-from policies import HELIX_POLICY
-from policies import MAX_DEVICES
+from .policies import HELIX_POLICY
+from .policies import MAX_DEVICES
 
 from constants import DEVICE_OPTIONS
 

diff --git a/simulator/hexgen.py → streamwise/model_provisioner/hexgen.py b/simulator/hexgen.py → streamwise/model_provisioner/hexgen.py
@@ -30,15 +30,15 @@
 from evaluator import calc_used_gpus
 from evaluator import evaluate_model_allocation
 
-from greedy import GreedyAllocator
+from .greedy import GreedyAllocator
 
 from actions import gen_actions
 from actions import choose_action
 from actions import apply_action
 
-from policies import HEXGEN_POLICY
-from policies import MAX_ITERATIONS
-from policies import USE_ALL_GPUS
+from .policies import HEXGEN_POLICY
+from .policies import MAX_ITERATIONS
+from .policies import USE_ALL_GPUS
 
 
 def _get_model_order(workflow: WorkflowConfig) -> list[Model]:

diff --git a/simulator/milp.py → streamwise/model_provisioner/milp.py b/simulator/milp.py → streamwise/model_provisioner/milp.py
@@ -40,7 +40,7 @@
 from constants import NUM_GPUS_PER_SERVER
 from constants import SECONDS_IN_HOUR
 
-from policies import STREAMWISE_MILP_POLICY
+from .policies import STREAMWISE_MILP_POLICY
 
 
 MAX_INSTANCES = 16

diff --git a/simulator/naive_baseline.py → ...mwise/model_provisioner/naive_baseline.py b/simulator/naive_baseline.py → ...mwise/model_provisioner/naive_baseline.py
@@ -31,8 +31,8 @@
 
 from evaluator import evaluate_model_allocation
 
-from policies import NAIVE_POLICY
-from policies import MAX_DEVICES
+from .policies import NAIVE_POLICY
+from .policies import MAX_DEVICES
 
 from model_allocator import ModelAllocator
 

diff --git a/simulator/policies.py → streamwise/model_provisioner/policies.py b/simulator/policies.py → streamwise/model_provisioner/policies.py
diff --git a/tests/simulator/test_auto_model_allocator.py b/tests/simulator/test_auto_model_allocator.py
@@ -23,7 +23,7 @@
 
 from tests.test_utils import temp_sys_path
 
-with temp_sys_path("simulator"):
+with temp_sys_path("simulator", "streamwise"):
     from sim_types import GPUType
     from sim_types import Model
     from sim_types import QualityLevel
@@ -33,18 +33,18 @@
 
     from data_loading import load_latency_data
 
-    from policies import STREAMWISE_POLICY
-    from policies import NAIVE_POLICY
-    from policies import HEXGEN_POLICY
-    from policies import HELIX_POLICY
+    from model_provisioner.policies import STREAMWISE_POLICY
+    from model_provisioner.policies import NAIVE_POLICY
+    from model_provisioner.policies import HEXGEN_POLICY
+    from model_provisioner.policies import HELIX_POLICY
 
     from auto_model_allocator import AutoModelAllocator
 
-    from greedy import GreedyAllocator
-    from naive_baseline import NaiveAllocator
-    from hexgen import HexGenAllocator
-    from helix import HelixAllocator
-    from milp import MILPAllocator
+    from model_provisioner.greedy import GreedyAllocator
+    from model_provisioner.naive_baseline import NaiveAllocator
+    from model_provisioner.hexgen import HexGenAllocator
+    from model_provisioner.helix import HelixAllocator
+    from model_provisioner.milp import MILPAllocator
 
     from workflows import PODCAST_WORKFLOW
 

diff --git a/tests/simulator/test_data_loading.py b/tests/simulator/test_data_loading.py
@@ -4,14 +4,15 @@
 
 import sys
 import os
+from pathlib import Path
 import pytest
 
 # Add current path
 sys.path.append(os.getcwd())
 
 from tests.test_utils import temp_sys_path
 
-with temp_sys_path("simulator"):
+with temp_sys_path("simulator", "streamwise"):
     from sim_types import QualityLevel
 
     from data_loading import load_latency_data
@@ -59,3 +60,11 @@ def test_adaptive_quality() -> None:
             "simulator/data/",
             "nonexisting"
         )
+
+
+def test_default_data_dir_is_cwd_relative(monkeypatch: pytest.MonkeyPatch) -> None:
+    repo_root = Path(__file__).resolve().parents[2]
+    monkeypatch.chdir(repo_root / "simulator")
+
+    assert load_latency_data() is not None
+    assert load_power_data() is not None
diff --git a/tests/simulator/test_evaluator.py b/tests/simulator/test_evaluator.py
@@ -8,7 +8,7 @@
 from tests.test_utils import assert_equals_approx
 from tests.test_utils import temp_sys_path
 
-with temp_sys_path("simulator"):
+with temp_sys_path("simulator", "streamwise"):
     from constants import DEFAULT_WORKFLOW_CONFIG
     from constants import SECONDS_IN_HOUR
 
@@ -20,7 +20,7 @@
 
     from evaluator import evaluate_model_allocation
 
-    from policies import STREAMWISE_POLICY
+    from model_provisioner.policies import STREAMWISE_POLICY
 
     from models import FluxModelAllocation
     from models import GemmaModelAllocation

diff --git a/tests/simulator/test_greedy.py b/tests/simulator/test_greedy.py
@@ -8,7 +8,7 @@
 
 from tests.test_utils import temp_sys_path
 
-with temp_sys_path("simulator"):
+with temp_sys_path("simulator", "streamwise"):
     from constants import DEFAULT_WORKFLOW_CONFIG
     from constants import SECONDS_IN_HOUR
 
@@ -21,9 +21,9 @@
     from data_loading import load_latency_data
     from data_loading import load_power_data
 
-    from greedy import GreedyAllocator
+    from model_provisioner.greedy import GreedyAllocator
 
-    from policies import STREAMWISE_POLICY
+    from model_provisioner.policies import STREAMWISE_POLICY
 
 
 def test_allocate_8A_8H() -> None:

diff --git a/tests/simulator/test_helix.py b/tests/simulator/test_helix.py
@@ -12,16 +12,16 @@
 
 from tests.test_utils import temp_sys_path
 
-with temp_sys_path("simulator"):
+with temp_sys_path("simulator", "streamwise"):
     from constants import DEFAULT_WORKFLOW_CONFIG
     from sim_types import GPUType
     from sim_types import Model
     from sim_types import MODEL_ORDER
     from sim_types import Solver
     from data_loading import load_latency_data
     from data_loading import load_power_data
-    from helix import HelixAllocator
-    from policies import HELIX_POLICY
+    from model_provisioner.helix import HelixAllocator
+    from model_provisioner.policies import HELIX_POLICY
 
 
 def test_get_model_order() -> None: