From bdb8c5a1570a759a8af413cc36b696c97d3f362c Mon Sep 17 00:00:00 2001 From: Dj Isaac Date: Sat, 14 Feb 2026 13:16:37 -0800 Subject: [PATCH 1/2] fix(docs): change idleTimeout from minutes to seconds user report, could be wrong - seems right though. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c6fe256c..7ade73fd 100644 --- a/README.md +++ b/README.md @@ -595,7 +595,7 @@ The following parameters can be used with `LiveServerless` (full remote code exe | `gpuCount` | Number of GPUs per worker | 1 | 1, 2, 4 | | `workersMin` | Minimum number of workers | 0 | Set to 1 for persistence | | `workersMax` | Maximum number of workers | 3 | Higher for more concurrency | -| `idleTimeout` | Minutes before scaling down | 5 | 10, 30, 60 | +| `idleTimeout` | Seconds before scaling down | 5 | 10, 30, 60 | | `env` | Environment variables | `None` | `{"HF_TOKEN": "xyz"}` | | `networkVolumeId` | Persistent storage ID | `None` | `"vol_abc123"` | | `executionTimeoutMs`| Max execution time (ms) | 0 (no limit) | 600000 (10 min) | From 2daef36939d628c3f3e8b43c84c1ea88a71ca9da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dean=20Qui=C3=B1anola?= Date: Mon, 16 Feb 2026 16:26:09 -0800 Subject: [PATCH 2/2] fix: correct idleTimeout to 60 seconds across docs and code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Documentation incorrectly stated idleTimeout was in minutes with default 5. The parameter is actually in seconds with default value of 60. Changes: - README.md: Update parameter table (minutes → seconds, default 5 → 60) - docs/Flash_SDK_Reference.md: Correct default value (300 → 60) - skeleton_template/README.md: Fix unit and default (minutes → seconds, 5 → 60) - serverless.py: Update default value (5 → 60) - Template workers: Update examples (idleTimeout=5 → idleTimeout=60) --- README.md | 2 +- docs/Flash_SDK_Reference.md | 2 +- src/runpod_flash/cli/utils/skeleton_template/README.md | 2 +- .../cli/utils/skeleton_template/workers/cpu/endpoint.py | 2 +- .../cli/utils/skeleton_template/workers/gpu/endpoint.py | 2 +- src/runpod_flash/core/resources/serverless.py | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 7ade73fd..8b9c3ea9 100644 --- a/README.md +++ b/README.md @@ -595,7 +595,7 @@ The following parameters can be used with `LiveServerless` (full remote code exe | `gpuCount` | Number of GPUs per worker | 1 | 1, 2, 4 | | `workersMin` | Minimum number of workers | 0 | Set to 1 for persistence | | `workersMax` | Maximum number of workers | 3 | Higher for more concurrency | -| `idleTimeout` | Seconds before scaling down | 5 | 10, 30, 60 | +| `idleTimeout` | Seconds before scaling down | 60 | 300, 600, 1800 | | `env` | Environment variables | `None` | `{"HF_TOKEN": "xyz"}` | | `networkVolumeId` | Persistent storage ID | `None` | `"vol_abc123"` | | `executionTimeoutMs`| Max execution time (ms) | 0 (no limit) | 600000 (10 min) | diff --git a/docs/Flash_SDK_Reference.md b/docs/Flash_SDK_Reference.md index 1dd65e83..d0da3632 100644 --- a/docs/Flash_SDK_Reference.md +++ b/docs/Flash_SDK_Reference.md @@ -124,7 +124,7 @@ class ResourceConfig: # Worker scaling workersMin: int = 0 # Minimum workers to maintain workersMax: int = 3 # Maximum workers allowed - idleTimeout: int = 300 # Seconds before idle worker terminates + idleTimeout: int = 60 # Seconds before idle worker terminates # Networking networkVolumeId: Optional[str] = None # Mount persistent storage diff --git a/src/runpod_flash/cli/utils/skeleton_template/README.md b/src/runpod_flash/cli/utils/skeleton_template/README.md index be7b8d55..6c4801e5 100644 --- a/src/runpod_flash/cli/utils/skeleton_template/README.md +++ b/src/runpod_flash/cli/utils/skeleton_template/README.md @@ -128,7 +128,7 @@ The `@remote` decorator transparently executes functions on serverless infrastru ### Resource Scaling Both workers scale to zero when idle to minimize costs: -- **idleTimeout**: Minutes before scaling down (default: 5) +- **idleTimeout**: Seconds before scaling down (default: 60) - **workersMin**: 0 = completely scales to zero - **workersMax**: Maximum concurrent workers diff --git a/src/runpod_flash/cli/utils/skeleton_template/workers/cpu/endpoint.py b/src/runpod_flash/cli/utils/skeleton_template/workers/cpu/endpoint.py index e025ed76..8161e5a7 100644 --- a/src/runpod_flash/cli/utils/skeleton_template/workers/cpu/endpoint.py +++ b/src/runpod_flash/cli/utils/skeleton_template/workers/cpu/endpoint.py @@ -4,7 +4,7 @@ name="cpu_worker", workersMin=0, workersMax=1, - idleTimeout=5, + idleTimeout=60, ) diff --git a/src/runpod_flash/cli/utils/skeleton_template/workers/gpu/endpoint.py b/src/runpod_flash/cli/utils/skeleton_template/workers/gpu/endpoint.py index fc2bae4e..f3c4466c 100644 --- a/src/runpod_flash/cli/utils/skeleton_template/workers/gpu/endpoint.py +++ b/src/runpod_flash/cli/utils/skeleton_template/workers/gpu/endpoint.py @@ -5,7 +5,7 @@ gpus=[GpuGroup.ANY], workersMin=0, workersMax=1, - idleTimeout=5, + idleTimeout=60, ) diff --git a/src/runpod_flash/core/resources/serverless.py b/src/runpod_flash/core/resources/serverless.py index fc87efa6..d2a0f7b0 100644 --- a/src/runpod_flash/core/resources/serverless.py +++ b/src/runpod_flash/core/resources/serverless.py @@ -156,7 +156,7 @@ class ServerlessResource(DeployableResource): # === Input Fields === executionTimeoutMs: Optional[int] = 0 gpuCount: Optional[int] = 1 - idleTimeout: Optional[int] = 5 + idleTimeout: Optional[int] = 60 instanceIds: Optional[List[CpuInstanceType]] = None locations: Optional[str] = None name: str