Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .flake8
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ max-line-length = 120
# Ignore E402: module-level import not at top of file
# Ignore W503: line break before binary operator (incompatible with W504)
ignore = E402,W503
exclude = .venv
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
*.sln.docstates
*.env

# Environment files
.venv/

# User-specific files (MonoDevelop/Xamarin Studio)
*.userprefs

Expand Down
15 changes: 15 additions & 0 deletions simulator/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
"""
Simulator package — provisioning sweeps, multi-request analysis, and plotting
on top of the model_provisioner allocation policies.

The allocation policy implementations live in ``streamwise/model_provisioner/``.
"""
import os
import sys

# Make model_provisioner importable for simulator modules.
_STREAMWISE_DIR = os.path.normpath(
os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "streamwise")
)
if _STREAMWISE_DIR not in sys.path:
sys.path.insert(0, _STREAMWISE_DIR)
Comment on lines +11 to +15
2 changes: 1 addition & 1 deletion simulator/actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from sim_types import Objective
from sim_types import Policy

from policies import STREAMWISE_POLICY
from model_provisioner.policies import STREAMWISE_POLICY

from models import get_model_allocation

Expand Down
12 changes: 6 additions & 6 deletions simulator/auto_model_allocator.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from sim_types import GPUType
from sim_types import Result

from policies import STREAMWISE_POLICY
from model_provisioner.policies import STREAMWISE_POLICY

from model_allocator import ModelAllocator

Expand Down Expand Up @@ -47,39 +47,39 @@ def __init__(
def _build_allocator(self) -> ModelAllocator:
"""Create concrete allocator based on configured solver."""
if self.policy.solver == Solver.GREEDY:
from greedy import GreedyAllocator
from model_provisioner.greedy import GreedyAllocator
return GreedyAllocator(
workflow=self.workflow,
latency_data=self.latency_data,
power_data=self.power_data,
policy=self.policy,
)
if self.policy.solver == Solver.NAIVE:
from naive_baseline import NaiveAllocator
from model_provisioner.naive_baseline import NaiveAllocator
return NaiveAllocator(
workflow=self.workflow,
latency_data=self.latency_data,
power_data=self.power_data,
policy=self.policy,
)
if self.policy.solver in {Solver.GUROBI, Solver.HIGHS}:
from milp import MILPAllocator
from model_provisioner.milp import MILPAllocator
return MILPAllocator(
workflow=self.workflow,
latency_data=self.latency_data,
power_data=self.power_data,
policy=self.policy,
)
if self.policy.solver == Solver.HEXGEN:
from hexgen import HexGenAllocator
from model_provisioner.hexgen import HexGenAllocator
return HexGenAllocator(
workflow=self.workflow,
latency_data=self.latency_data,
power_data=self.power_data,
policy=self.policy,
)
if self.policy.solver == Solver.HELIX:
from helix import HelixAllocator
from model_provisioner.helix import HelixAllocator
return HelixAllocator(
workflow=self.workflow,
latency_data=self.latency_data,
Expand Down
12 changes: 7 additions & 5 deletions simulator/data_loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,17 @@
from constants import POWER_GPU_IDLE
from constants import POWER_GPU_TDP

_DEFAULT_DATA_DIR = Path("data")


def load_latency_data(
data_dir: str = "data/",
data_dir: str | Path = _DEFAULT_DATA_DIR,
) -> LatencyData:
"""
Load latency and throughput mapping data from CSV files.

Args:
data_dir (str): The directory where the CSV files are stored.
data_dir: The directory where the CSV files are stored.
Returns:
LatencyData: An object containing all loaded latency data.
"""
Expand Down Expand Up @@ -107,13 +109,13 @@ def load_latency_data(


def load_power_data(
data_dir: str = "data/"
data_dir: str | Path = _DEFAULT_DATA_DIR
) -> PowerData:
"""
Load power consumption data from CSV files.

Args:
data_dir (str): The directory where the CSV files are stored.
data_dir: The directory where the CSV files are stored.
Returns:
PowerData: An object containing all loaded power consumption data.
"""
Expand Down Expand Up @@ -216,7 +218,7 @@ def load_power_data(


def load_adaptive_quality_data(
data_dir: str,
data_dir: str | Path,
level: QualityLevel,
) -> LatencyData:
"""Load latency data for adaptive quality."""
Expand Down
2 changes: 1 addition & 1 deletion simulator/model_allocator.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from models import UpscalerModelAllocation
from models import OthersModelAllocation

from policies import NAIVE_POLICY
from model_provisioner.policies import NAIVE_POLICY


class ModelAllocator(ABC):
Expand Down
2 changes: 1 addition & 1 deletion simulator/multirequests.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

from workflows import PODCAST_WORKFLOW

from policies import STREAMWISE_POLICY
from model_provisioner.policies import STREAMWISE_POLICY

from auto_model_allocator import AutoModelAllocator

Expand Down
2 changes: 1 addition & 1 deletion simulator/provisioning.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@

from auto_model_allocator import AutoModelAllocator

from policies import STREAMWISE_POLICY
from model_provisioner.policies import STREAMWISE_POLICY

from constants import SECONDS_IN_HOUR

Expand Down
20 changes: 20 additions & 0 deletions streamwise/model_provisioner/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
"""
Model Provisioner — allocation policy implementations for GPU resource distribution.

Contains greedy, naive, MILP, HexGen, and Helix allocation strategies.
The foundation types (sim_types, constants, models, etc.) live in simulator/.
"""
import os
import sys

# Add simulator/ to sys.path so policy files can import foundation modules.
# Supports both local dev layout (../../simulator) and Docker layout (../simulator).
_HERE = os.path.dirname(os.path.abspath(__file__))
_CANDIDATES = [
os.path.normpath(os.path.join(_HERE, "..", "..", "simulator")),
os.path.normpath(os.path.join(_HERE, "..", "simulator")),
]
for _path in _CANDIDATES:
if os.path.isdir(_path) and _path not in sys.path:
sys.path.insert(0, _path)
break
Comment on lines +10 to +20
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@

from model_allocator import ModelAllocator

from policies import STREAMWISE_POLICY
from policies import MAX_ITERATIONS
from policies import USE_ALL_GPUS
from .policies import STREAMWISE_POLICY
from .policies import MAX_ITERATIONS
from .policies import USE_ALL_GPUS

from actions import gen_actions
from actions import choose_action
Expand Down
6 changes: 3 additions & 3 deletions simulator/helix.py → streamwise/model_provisioner/helix.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,10 @@

from evaluator import evaluate_model_allocation

from milp import MILPAllocator
from .milp import MILPAllocator

from policies import HELIX_POLICY
from policies import MAX_DEVICES
from .policies import HELIX_POLICY
from .policies import MAX_DEVICES

from constants import DEVICE_OPTIONS

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,15 @@
from evaluator import calc_used_gpus
from evaluator import evaluate_model_allocation

from greedy import GreedyAllocator
from .greedy import GreedyAllocator

from actions import gen_actions
from actions import choose_action
from actions import apply_action

from policies import HEXGEN_POLICY
from policies import MAX_ITERATIONS
from policies import USE_ALL_GPUS
from .policies import HEXGEN_POLICY
from .policies import MAX_ITERATIONS
from .policies import USE_ALL_GPUS


def _get_model_order(workflow: WorkflowConfig) -> list[Model]:
Expand Down
2 changes: 1 addition & 1 deletion simulator/milp.py → streamwise/model_provisioner/milp.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
from constants import NUM_GPUS_PER_SERVER
from constants import SECONDS_IN_HOUR

from policies import STREAMWISE_MILP_POLICY
from .policies import STREAMWISE_MILP_POLICY


MAX_INSTANCES = 16
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@

from evaluator import evaluate_model_allocation

from policies import NAIVE_POLICY
from policies import MAX_DEVICES
from .policies import NAIVE_POLICY
from .policies import MAX_DEVICES

from model_allocator import ModelAllocator

Expand Down
File renamed without changes.
20 changes: 10 additions & 10 deletions tests/simulator/test_auto_model_allocator.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

from tests.test_utils import temp_sys_path

with temp_sys_path("simulator"):
with temp_sys_path("simulator", "streamwise"):
from sim_types import GPUType
from sim_types import Model
from sim_types import QualityLevel
Expand All @@ -33,18 +33,18 @@

from data_loading import load_latency_data

from policies import STREAMWISE_POLICY
from policies import NAIVE_POLICY
from policies import HEXGEN_POLICY
from policies import HELIX_POLICY
from model_provisioner.policies import STREAMWISE_POLICY
from model_provisioner.policies import NAIVE_POLICY
from model_provisioner.policies import HEXGEN_POLICY
from model_provisioner.policies import HELIX_POLICY

from auto_model_allocator import AutoModelAllocator

from greedy import GreedyAllocator
from naive_baseline import NaiveAllocator
from hexgen import HexGenAllocator
from helix import HelixAllocator
from milp import MILPAllocator
from model_provisioner.greedy import GreedyAllocator
from model_provisioner.naive_baseline import NaiveAllocator
from model_provisioner.hexgen import HexGenAllocator
from model_provisioner.helix import HelixAllocator
from model_provisioner.milp import MILPAllocator

from workflows import PODCAST_WORKFLOW

Expand Down
11 changes: 10 additions & 1 deletion tests/simulator/test_data_loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,15 @@

import sys
import os
from pathlib import Path
import pytest

# Add current path
sys.path.append(os.getcwd())

from tests.test_utils import temp_sys_path

with temp_sys_path("simulator"):
with temp_sys_path("simulator", "streamwise"):
from sim_types import QualityLevel

from data_loading import load_latency_data
Expand Down Expand Up @@ -59,3 +60,11 @@ def test_adaptive_quality() -> None:
"simulator/data/",
"nonexisting"
)


def test_default_data_dir_is_cwd_relative(monkeypatch: pytest.MonkeyPatch) -> None:
repo_root = Path(__file__).resolve().parents[2]
monkeypatch.chdir(repo_root / "simulator")

assert load_latency_data() is not None
assert load_power_data() is not None
4 changes: 2 additions & 2 deletions tests/simulator/test_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from tests.test_utils import assert_equals_approx
from tests.test_utils import temp_sys_path

with temp_sys_path("simulator"):
with temp_sys_path("simulator", "streamwise"):
from constants import DEFAULT_WORKFLOW_CONFIG
from constants import SECONDS_IN_HOUR

Expand All @@ -20,7 +20,7 @@

from evaluator import evaluate_model_allocation

from policies import STREAMWISE_POLICY
from model_provisioner.policies import STREAMWISE_POLICY

from models import FluxModelAllocation
from models import GemmaModelAllocation
Expand Down
6 changes: 3 additions & 3 deletions tests/simulator/test_greedy.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from tests.test_utils import temp_sys_path

with temp_sys_path("simulator"):
with temp_sys_path("simulator", "streamwise"):
from constants import DEFAULT_WORKFLOW_CONFIG
from constants import SECONDS_IN_HOUR

Expand All @@ -21,9 +21,9 @@
from data_loading import load_latency_data
from data_loading import load_power_data

from greedy import GreedyAllocator
from model_provisioner.greedy import GreedyAllocator

from policies import STREAMWISE_POLICY
from model_provisioner.policies import STREAMWISE_POLICY


def test_allocate_8A_8H() -> None:
Expand Down
6 changes: 3 additions & 3 deletions tests/simulator/test_helix.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,16 @@

from tests.test_utils import temp_sys_path

with temp_sys_path("simulator"):
with temp_sys_path("simulator", "streamwise"):
from constants import DEFAULT_WORKFLOW_CONFIG
from sim_types import GPUType
from sim_types import Model
from sim_types import MODEL_ORDER
from sim_types import Solver
from data_loading import load_latency_data
from data_loading import load_power_data
from helix import HelixAllocator
from policies import HELIX_POLICY
from model_provisioner.helix import HelixAllocator
from model_provisioner.policies import HELIX_POLICY


def test_get_model_order() -> None:
Expand Down
Loading