Skip to content

Commit a63c110

Browse files
committed
Extend Modal runner timeout
1 parent 2fbe079 commit a63c110

2 files changed

Lines changed: 18 additions & 8 deletions

File tree

src/runners/modal_runner.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,8 @@
8585
"modal_runner_archs",
8686
)
8787

88+
MODAL_RUN_TIMEOUT_SECONDS = 60 * 60
89+
8890

8991
class TimeoutException(Exception):
9092
pass
@@ -111,7 +113,7 @@ def timeout_handler(signum, frame):
111113

112114
def modal_run_config( # noqa: C901
113115
config: dict,
114-
timeout_seconds: int = 600,
116+
timeout_seconds: int = MODAL_RUN_TIMEOUT_SECONDS,
115117
) -> FullResult:
116118
"""Modal version of run_pytorch_script, handling timeouts"""
117119
try:

src/runners/modal_runner_archs.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,21 @@
11
# This file contains wrapper functions for running
22
# Modal apps on specific devices. We will fix this later.
3-
from modal_runner import app, cuda_image, modal_run_config
3+
from modal_runner import MODAL_RUN_TIMEOUT_SECONDS, app, cuda_image, modal_run_config
44

55
gpus = ["T4", "L4", "L4:4", "A100-80GB", "H100!", "B200"]
66
for gpu in gpus:
77
gpu_slug = gpu.lower().split("-")[0].strip("!").replace(":", "x")
8-
app.function(gpu=gpu, image=cuda_image, name=f"run_cuda_script_{gpu_slug}", serialized=True)(
9-
modal_run_config
10-
)
11-
app.function(gpu=gpu, image=cuda_image, name=f"run_pytorch_script_{gpu_slug}", serialized=True)(
12-
modal_run_config
13-
)
8+
app.function(
9+
gpu=gpu,
10+
image=cuda_image,
11+
name=f"run_cuda_script_{gpu_slug}",
12+
serialized=True,
13+
timeout=MODAL_RUN_TIMEOUT_SECONDS,
14+
)(modal_run_config)
15+
app.function(
16+
gpu=gpu,
17+
image=cuda_image,
18+
name=f"run_pytorch_script_{gpu_slug}",
19+
serialized=True,
20+
timeout=MODAL_RUN_TIMEOUT_SECONDS,
21+
)(modal_run_config)

0 commit comments

Comments
 (0)